简单整理两个绘制热图R包的用法,分别是基础的pheatmap包与复杂的ComplexHeatmap包。

pheatmap

1
2
3
4
5
# install.packages("pheatmap")
library(pheatmap)

packageVersion("pheatmap")
# [1] ‘1.0.12’

0、 示例数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
exp = matrix(rnorm(300), nrow = 30, ncol = 10)
set.seed(123)
exp[1:15, 1:5] = exp[1:15, 1:5] + matrix(rnorm(75,mean = 4), nrow = 15, ncol = 5)
set.seed(123)
exp[16:30, 6:10] = exp[16:30, 6:10] + matrix(rnorm(75,mean = 3), nrow = 15, ncol = 5)
exp = round(exp, 2)
colnames(exp) = paste("Sample", 1:10, sep = "")
rownames(exp) = paste("Gene", 1:30, sep = "")
dim(exp)
# [1] 30 10
head(exp)
#       Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8 Sample9 Sample10
# Gene1    4.47    5.74    5.56    3.18    6.38    0.06   -0.09    0.13    0.70    -1.75
# Gene2    3.49    3.71    2.24    4.23    4.10   -0.70    1.08    0.22   -0.11     0.10
# Gene3    4.34    0.37    5.64    3.05    2.42   -0.72    0.63    1.64   -1.26    -0.57
# Gene4    4.25    4.32    6.79    5.30    2.37    0.88   -0.11   -0.22    1.68    -0.97
# Gene5    3.99    4.45    3.38    4.29    1.74   -1.02   -1.53    0.17    0.91    -0.18
# Gene6    5.72    2.36    5.39    4.04    6.50    1.96   -0.52    1.17    0.24     1.01
##基础绘图
pheatmap(exp)
image-20220612134929311

1、聚类相关参数

  • 下述均是针对行row的操作,改为col即为针对列的操作。
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
## (1) 不聚类
pheatmap(exp, cluster_row = FALSE)

## (2) 聚类但显示聚类树
pheatmap(exp, treeheight_row = 0)

## (3) 距离计算公式
clustering_distance_rows = "euclidean" # "correlation"

## (4) 聚类间距离计算方法
clustering_method = "average" #"ward.D2", "single", "complete"等

## (5) 获取聚类后热图的表达矩阵(改变了行列顺序)
ph = pheatmap(exp)
ph$tree_row$order
ph$tree_col$order
ph_exp = exp[ph$tree_row$order, ph$tree_col$order]
ph_exp[1:4,1:4]
#        Sample3 Sample5 Sample2 Sample1
# Gene26    1.05   -0.57    1.44   -0.71
# Gene19   -1.01   -0.52   -0.26   -0.63
# Gene24    0.98   -1.24   -0.96   -0.24
# Gene18    0.33    1.23   -0.49    0.24

2、颜色相关

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
#Default
colours = colorRampPalette(rev(RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")))(100)
str(colours)
# chr [1:100] "#4575B4" "#4979B6" "#4E7DB8" "#5282BB" "#5786BD" "#5C8BBF" "#608FC2" ...

# 个性化修改
colours = colorRampPalette(c("navy", "white", "firebrick3"))(10)
#colours = colorRampPalette(c("#3288bd", "white", "#d53e4f"))(10)
str(colours)
# chr [1:10] "#3288BD" "#5FA2CB" "#8DBCDA" "#BAD7E9" "#E8F1F7" "#FAE9EB" "#F1BEC4" ...
pheatmap(exp, color = colours)

image-20220612135430912

3、行列注释

annotation_col=annotation_row=参数提供data.frame

  • 行名与表达矩阵的行名与列名相同;
  • 列的内容为分组信息。
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# 构建列注释信息(行名与表达矩阵的列名col保持一致)
annotation_col = data.frame(
  group = rep(c("Group_A", "Group_B"), each = 5),
  row.names = colnames(exp))
head(annotation_col)

# 构建行注释信息(行名与表达矩阵的行名row保持一致)
annotation_row = data.frame(
  Type = rep(c("Up", "Down"), each = 15),
  row.names = rownames(exp))
head(annotation_row)

pheatmap(exp, 
         annotation_col = annotation_col,
         annotation_row = annotation_row)
image-20220612135633376
1
2
3
4
5
6
7
8
#修改注释标签的颜色
ann_colors = list(
  group = c(Group_A = "#e66101", Group_B = "#5e3c99"),
  Type = c(Up = "#e7298a", Down = "#66a61e"))
pheatmap(exp, 
         annotation_col = annotation_col, 
         annotation_row = annotation_row, 
         annotation_colors = ann_colors)

4、热图格子相关

(1)格子边框

  • 格子的边框颜色border_color=;无边框border=F
  • 格子的长宽cellwidth = 15, cellheight = 12
1
2
pheatmap(exp, border_color = "white",
         cellwidth = 9, cellheight = 9)

(2)文本填充

1
2
3
4
5
6
7
8
9
# 默认使用绘制热图值的矩阵
pheatmap(exp, display_numbers = TRUE,
         number_color = "blue", number_format = "%.1e")  #default "%.2f"

# 自定义填充内容
pheatmap(exp, display_numbers = matrix(ifelse(exp > 5, exp, ""), 
                                       nrow(exp)))
pheatmap(exp, display_numbers = matrix(ifelse(exp > 5, "*", ""), 
                                        nrow(exp)))

(3) 切割热图

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
#(1) 根据聚类树,按指定数目进行分割
pheatmap(exp, 
         cutree_cols = 2,
         cutree_rows = 4)

#(2) 指定行/列数进行分割,前提是要取消聚类
pheatmap(exp, 
         cluster_rows = FALSE,cluster_cols = FALSE,
         gaps_row = c(10, 20),
         gaps_col = 5)
image-20220612140449341

5、行列名的调整

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
# (1) 不显示行名与列名
pheatmap(exp, show_rownames=F, show_colnames=F)

# (2) 调整列名的角度,只能是“270”, “0”, “45”, “90”, “315”中的一种。不可调整行名
pheatmap(exp, angle_col = 45)

# (3) 字体大小
pheatmap(exp, fontsize_row = 12,fontsize_col = 8)

# (4) 仅特定显示部分行名
# 注意:由于聚类的原因,一定要注意对应的顺序

match(c("Gene1","Gene2","Gene3"), rownames(exp)[pheatmap(exp)$tree_row$order])
# [1] 19 29 24
labels_row = rep("", nrow(exp))
labels_row[c(19, 29, 24)] = c("Gene1","Gene2","Gene3")
pheatmap(exp, labels_row = labels_row)
image-20220612141516869

6、其它

(1)按行按列归一化

  • 默认按照原始值可视化。

  • 可通过scale = c("row", "column")参数设置按照行或者列进行归一化之后的结果绘图

1
2
3
4
5
6
pheatmap(exp, scale = "row")
# scale_rows = function(x){
#   m = apply(x, 1, mean, na.rm = T)
#   s = apply(x, 1, sd, na.rm = T)
#   return((x - m) / s)
# }

(2)转为ggplot2对象

1
2
3
4
library(ggplot2)
library(ggplotify)
g = as.ggplot(pheatmap(exp))
g + ggtitle("This is a ggplot object")
image-20220612141745082

ComplexHeatmap

ComplexHeatmap包绘制热图的主要函数为Heatmap();其次也提供了pheatmap()函数可实现上述的所有功能。

https://jokergoo.github.io/ComplexHeatmap-reference/book/

1、基础

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
# BiocManager::install("ComplexHeatmap")
library(ComplexHeatmap)
Heatmap(exp)
Heatmap(exp, name = "Test")
# name=    交代legend title, 同时也是unique identifier for the heatmap

hp = Heatmap(exp)
grob = grid.grabExpr(draw(hp)) 
g = ggplotify::as.ggplot(grob)
# 转为ggplot2对象,可用于后续的拼图

2、设置颜色

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
##(1) 连续变量
library(circlize)
# col_fun = colorRamp2(c(min(exp), 0, max(exp)), c("green", "white", "red"))
col_fun = colorRamp2(c(-5, 0, 5), c("green", "white", "red"))
## 小于-2的均为green、大于2的均为red,如此可避免离群值的影响
## na_col = "black" 可用于单独设置缺失值颜色
Heatmap(exp, name = "Test", col = col_fun)

##(2) 离散变量
mat = matrix(sample(LETTERS[1:6], 15, replace = T),
       nrow = 5, byrow = TRUE, 
       dimnames = list(c("Row-1","Row-2","Row-3","Row-4","Row-5"),
                       c("Col-1","Col-2","Col-3")))
colors = RColorBrewer::brewer.pal(n = 6, name = "Set1")
names(colors) = LETTERS[1:6]
colors
# A         B         C         D         E         F 
# "#E41A1C" "#377EB8" "#4DAF4A" "#984EA3" "#FF7F00" "#FFFF33" 
Heatmap(mat, name = "Test", col = colors)

# rect_gp = gpar(col = "white", lwd = 1)
# 用于设置热图格子内边框颜色、宽度
image-20220612150612689

3、设置行列标题

如下为column的演示,row的参数对应相同

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
column_title = "This is a column title"   #标题内容
column_title_side = c("top", "bottom")    #标题位置
#标题格式
column_title_gp = gpar(fontsize = 20,     #大小
                       fontface = "bold", #字体
                       col = "white",     #文本颜色
                       fill = "red",      #填充颜色
                       border = "blue")   #边框颜色                
column_title_rot = 0                      #旋转角度,支持水平/垂直

Heatmap(exp, name = "Test",
        column_title = "This is a column title",
        column_title_side = "top",
        column_title_gp = gpar(fontsize = 20))

#当设置填充颜色时,需要预先设置如下参数,使得标题位于中心
# ht_opt$TITLE_PADDING = unit(c(8.5, 8.5), "points")
# ht_opt(RESET = TRUE)

4、设置行名与列名

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
show_row_names = FALSE                        #是否显示
row_names_side = "left"                       #两边位置
row_names_gp = gpar(fontsize = 20,            #大小
                    col = c(rep("red", 10), 
                            rep("blue", 8)))  #颜色 
row_names_rot = -45                           #旋转角度
row_names_centered = TRUE                     #是否居中对齐
row_labels = NULL                             #替换显示的名字


row_labels = paste0("Row-",1:30)
names(row_labels) = rownames(exp)

Heatmap(exp, name = "Test", 
        row_names_rot = -30,  
        row_labels = row_labels[rownames(exp)])
image-20220612155402956

5、聚类相关

如下为column的演示,row的参数对应相同

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
# 不聚类
cluster_columns  = FALSE
# 不显示聚类
show_column_dend = FALSE
# 聚类树位置
column_dend_side = "bottom"
# 树的高度
column_dend_height = unit(4, "cm")
# 聚类算法,可自定义
clustering_distance_columns = "euclidean"  #default
# 距离计算方式 
clustering_method_columns = "complete"     #default
# 手动设置行列的顺序,此时会自动关闭相应的聚类操作
column_order = 10:1

6、分割热图

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
# column_split = factor(rep(c("GroupA", "GroupB"), each=5),
#                       levels = c("GroupB", "GroupA"))
column_split = rep(c("GroupA", "GroupB"), each=5)  #设置分组字符串变量

column_title = c("AA", "BB")                       #自定义小组名
column_title = NULL                                #取消小组名
column_gap = unit(5, "mm")                         #设置gap的宽度

## 默认仍然进行聚类:先在小组中进行聚类;然后再进行组之间的聚类
show_parent_dend_line = FALSE
## 仅在小组内聚类;不进行组之间的聚类
cluster_column_slices = FALSE

row_km = 2      #按K均值聚类分割
row_split = 2   #按原始聚类树结果分割
image-20220612163429421

7、其它

(1)文本填充

1
2
3
4
5
6
7
Heatmap(exp, name = "Test", 
        cell_fun = function(j, i, x, y, width, height, fill) {
          if(exp[i, j] > 0)   #仅显示正值
          grid.text(sprintf("%.1f", exp[i, j]),   #具体填充的文本值
                    x, y,                         #填充在热图的位置
                    gp = gpar(fontsize = 10))
        })
image-20220612164034136

(2)图的大小

1
2
3
4
5
6
#整个图的大小
heatmap_width = unit(8, "cm")
heatmap_height = unit(8, "cm")
#热图的大小
width = unit(8, "cm")
height = unit(8, "cm")

(3)从热图对象中获取信息

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
ht = Heatmap(exp)
ht2 = draw(ht)
row_order(ht2)
#[1]  6  1  7  4 14  9 10  8 15 11  2 13  5 12  3 26 24 19 18 21 29 25 30 22 17 16 20 23 28 27

ht  = Heatmap(exp, row_km = 3)
ht2 = draw(ht)
row_order(ht2)
# $`3`
# [1]  6  1  7  4 14  9 10  8 15 11  2 13  5 12  3
# 
# $`2`
# [1] 29 26 24 19 25
# 
# $`1`
# [1] 18 21 22 17 20 23 28 16 30 27

8、热图注释anno

(1) simple_anno

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
set.seed(123)
mat = matrix(rnorm(36), 6)
rownames(mat) = paste0("R", 1:6)
colnames(mat) = paste0("C", 1:6)


library(circlize)
col_fun = colorRamp2(c(-1,0,1), c("blue", "white", "red"))
column_ha = HeatmapAnnotation(Anno1 = rnorm(6),
                              Anno2 = rep(c("A","B"),each=3),
                              # 设定颜色
                              col = list(Anno1 = col_fun,
                                         Anno2 = c("A" = "red", 
                                                   "B" = "green")),
                              # 设定宽度
                              simple_anno_size = unit(1, "cm")
                              )  
Heatmap(mat, name = "mat", top_annotation = column_ha)
# top_annotation
# bottom_annotation
# left_annotation
# right_annotation
image-20220612171936555
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
set.seed(123)
pvalue = 10^(-runif(6, min = 0, max = 3))
pch = rep("*", 10)
pch[pvalue > 0.01] = NA
# [1] "*" NA  "*" NA  NA  "*" "*" NA  "*" NA
pvalue_col_fun = colorRamp2(c(0, 2, 3), c("green", "white", "red")) 
column_ha = HeatmapAnnotation(pvalue = anno_simple(-log10(pvalue), 
                                                   col = pvalue_col_fun,
                                                   pch = pch))
Heatmap(mat, name = "mat", top_annotation = column_ha)
lgd_pvalue = Legend(title = "p-value", col_fun = pvalue_col_fun, at = c(0, 1, 2, 3), 
                    labels = c("1", "0.1", "0.01", "0.001"))
lgd_sig = Legend(pch = "*", type = "points", labels = "< 0.01")
draw(Heatmap(mat, name = "mat", top_annotation = column_ha, column_split = 2), 
     annotation_legend_list = list(lgd_pvalue, lgd_sig))

image-20220612172311143

(2) anno_block

anno_block 在分割热图时,注释每一个分组信息

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
ha = HeatmapAnnotation(
  Anno1 = anno_block(gp = gpar(fill = c("Red","Green")),
                   labels = c("Group1", "Group2"),
                   labels_gp = gpar(col = "white", fontsize = 15))
)
Heatmap(mat, 
        column_split = 2,
        column_title = NULL,
        show_column_dend = FALSE,
        top_annotation = ha)
image-20220612174239188

(3) anno_barplot

还有很多其它anno_*系列函数

https://jokergoo.github.io/ComplexHeatmap-reference/book/heatmap-annotations.html

以anno_barplot()为例

1
2
3
4
5
6
?anno_barplot
ha = rowAnnotation(
  Anno1 = anno_barplot(rnorm(6), baseline = 0)
)
Heatmap(mat, 
        right_annotation = ha)

image-20220612174457728

(4) anno_mark

1
2
3
4
5
6
7
ha = rowAnnotation(foo = anno_mark(at = c(1:4, 20, 29:30), 
                                   labels = paste0("Gene",c(1:4, 20, 29:30))))
Heatmap(exp, name = "mat", 
        cluster_rows = FALSE, 
        right_annotation = ha,
        row_names_side = "left", 
        row_names_gp = gpar(fontsize = 4))
image-20220612174850855

9、设置legend

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
##(1) 连续变量legend
Heatmap(mat,
        heatmap_legend_param = list(
          col_fun = col_fun,
          title = "Test", at = c(-2, 0, 1.9,2), 
          labels = c("neg_two", "zero", "any_posi(1.9)","pos_two"),
          legend_height = unit(6, "cm"),
          grid_width = unit(1, "cm"))
        )

##(2) 离散变量legend
mat2 = matrix(sample(LETTERS[1:6], 15, replace = T),
             nrow = 5, byrow = TRUE, 
             dimnames = list(c("Row-1","Row-2","Row-3","Row-4","Row-5"),
                             c("Col-1","Col-2","Col-3")))
colors = RColorBrewer::brewer.pal(n = 6, name = "Set1")
names(colors) = LETTERS[1:6]
colors
Heatmap(mat2, col = colors,
        heatmap_legend_param = list(
          title = "AAA", 
          at = LETTERS[1:6],
          labels = LETTERS[1:6],
          ncol = 2,
          by_row = TRUE,
          legend_gp = gpar(fill = colors),
          grid_height = unit(1, "cm"), 
          grid_width = unit(5, "mm"),
          row_gap = unit(1, "mm")
          )
        )
image-20220612205624027