0、示例数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
library(clusterProfiler)
library(org.Hs.eg.db)

data(geneList, package="DOSE")
str(geneList)
# Named num [1:12495] 4.57 4.51 4.42 4.14 3.88 ...
# - attr(*, "names")= chr [1:12495] "4312" "8318" "10874" "55143" ...
gene_ids<-AnnotationDbi::select(org.Hs.eg.db, keys=as.character(names(gene_list)), 
                                columns="SYMBOL", #目标格式
                                keytype="ENTREZID") #目前的格式
# 'select()' returned 1:1 mapping between keys and columns
## ORA分析
gene_deg = names(geneList)[abs(geneList) > 2]
ego <- enrichGO(gene          = gene_deg,
                OrgDb         = org.Hs.eg.db,
                ont           = "CC", # "BP","MF","ALL"
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05,
                readable      = TRUE)
## GSEA分析
gseago <- gseGO(geneList     = geneList,
                OrgDb        = org.Hs.eg.db,
                ont          = "CC",
                minGSSize    = 100,
                maxGSSize    = 500,
                pvalueCutoff = 0.05,
                verbose      = FALSE)
gseago = setReadable(gseago, OrgDb = org.Hs.eg.db)

1、ggplot2

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
## ORA
library(tidyverse)
ego_df = ego@result %>% head(10) %>% 
  dplyr::mutate(logp = -log10(p.adjust)) %>% 
  dplyr::arrange(logp) %>% 
  dplyr::mutate(Label=str_wrap(gsub("_"," ",Description), width = 50)) %>% 
  dplyr::mutate(Label = factor(Label, levels=Label)) %>% 
  tibble::remove_rownames() %>% 
  dplyr::select(Label, logp)
head(ego_df)
#                                      Label     logp
# 1           microtubule associated complex 5.419514
# 2                          spindle midzone 5.681085
# 3                      spindle microtubule 7.341138

ggplot(ego_df, aes(x=Label, y=logp)) + 
  geom_bar(stat="identity", fill="#1b9e77") + 
  xlab("Pathway names") +
  ylab("-log10(P.adj)") +
  scale_y_continuous(expand=c(0,0)) + 
  coord_flip() + 
  xlab("GO Pathway") + 
  ylab("Adjusted P value(-log10)") + 
  geom_text(aes(label = Label, y=0.2), hjust = 0, size=5) +
  theme_classic() +
  theme(axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.y = element_text(size = 25),
        axis.title.x = element_text(size = 20),
        axis.text.x = element_text(size = 16))
image-20230114200216781
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
## GSEA
library(tidyverse)
gseago_df = gseago@result %>% head(20) %>% 
  dplyr::mutate(logp = -log10(p.adjust)) %>% 
  dplyr::arrange(logp) %>% 
  dplyr::mutate(Label=str_wrap(gsub("_"," ",Description), width = 50)) %>% 
  dplyr::mutate(Label = factor(Label, levels=Label)) %>% 
  tibble::remove_rownames() %>% 
  dplyr::select(Label, logp, NES)
head(gseago_df)
#                            Label     logp      NES
# 1   polymeric cytoskeletal fiber 3.755730 1.487284
# 2   chromosome, telomeric region 4.311816 1.907511
# 3 microtubule associated complex 4.432327 1.944046
ggplot(gseago_df, 
       aes(NES, fct_reorder(Label, NES), fill=qvalue)) + 
  geom_bar(stat='identity') + 
  scale_fill_continuous(low='red', high='blue', 
                        guide=guide_colorbar(reverse=TRUE)) + 
  theme_minimal() + ylab(NULL)
image-20230114200540657

2、GOplot

  • 官方教程:https://wencke.github.io/
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# install.packages('GOplot')
library(GOplot)
# (1) 准备数据
## 富集通路结果
res_enrich = ego@result %>% 
  dplyr::select(ID, Description, geneID,p.adjust) %>% 
  dplyr::mutate(Category = "GO", .before=1) %>%  
  dplyr::mutate(geneID = gsub("/",", ", geneID)) %>% 
  dplyr::rename("Term"="Description", "Genes"="geneID", "adj_pval"="p.adjust") %>%
  dplyr::select(Category,ID,Term,Genes,adj_pval) %>% 
  tibble::remove_rownames()
t(res_enrich[1,])
#           1                                                                                                                                                                                   
# Category "GO"                                                                                                                                                                                
# ID       "GO:0005819"                                                                                                                                                                        
# Term     "spindle"                                                                                                                                                                           
# Genes    "CDCA8, CDC20, KIF23, CENPE, ASPM, DLGAP5, SKA1, NUSAP1, TPX2, TACC3, NEK2, CDK1, MAD2L1, KIF18A, BIRC5, KIF11, TRAT1, TTK, AURKB, PRC1, KIFC1, KIF18B, KIF20A, AURKA, CCNB1, KIF4A"
# adj_pval "6.339976e-11"
## 如上,ID为optional,Category为required

## (2) 基因差异倍数
res_logfc = data.frame(ID=gene_ids$SYMBOL,
                       logFC=geneList)
head(res_logfc)
#      ID    logFC
# 1  MMP1 4.572613
# 2 CDC45 4.514594
# 3   NMU 4.418218
## 如无相关数据,可随便设置为0

## (3) 构建绘图对象
circ <- circle_dat(res_enrich, res_logfc)
head(circ)
  • Circular visualization

    此种绘图方式,ID列为required

1
2
GOCircle(circ, nsub = 10)
# GOCircle(circ, nsub = 5)
image-20230114202946549
  • Chord visualization
1
2
3
4
5
6
7
8
## 展示指定通路
pw_sle = ego@result$Description[1:4]
## 展示指定基因
gene_sle = res_logfc
## 构建绘图对象
gene_sle = res_logfc
# chord <- chord_dat(data = circ, genes = gene_sle)
# chord <- chord_dat(data = circ, process = pw_sle)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
## 含有差异倍数信息情况
GOChord(chord, 
        limit = c(0, 0), 
        # 第1个数:基因至少富集到0条通路
        # 第2个数:基因至少富集到0条展示通路
        space = 0.02, #基因矩形的间距
        gene.order = 'none', 
        process.label = 10,  #legend字体大小
        lfc.col = c("red","gray","blue"),  #映射差异倍数的颜色
        ribbon.col=brewer.pal(length(pw_sle), "Set1"), #通路条带颜色
        gene.space = 0.25, #基因名至圆的距离
        gene.size = 3  # 基因名的大小
        )
image-20230114203752900
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
## 忽略差异倍数信息情况
GOChord(chord[,-1*ncol(chord)], 
        nlfc = 0,
        limit = c(3, 3), 
        # 第1个数:基因至少富集到3条通路
        # 第2个数:基因至少富集到3条展示通路
        space = 0.02, #基因矩形的间距
        gene.order = 'none', 
        process.label = 10,  #legend字体大小
        lfc.col = "red",  #映射差异倍数的颜色
        ribbon.col=brewer.pal(length(pw_sle), "Set1"), #通路条带颜色
        gene.space = 0.25, #基因名至圆的距离
        gene.size = 3  # 基因名的大小
)
image-20230114203917215

3、aPEAR

1
2
3
4
5
library(tidyverse)
library(clusterProfiler)
library(DOSE)
library(org.Hs.eg.db)
library(aPEAR)

该包的一个核心函数是enrichmentNetwork(),其需要的主要参数是表示富集分析的数据框。需要包含如下几列

  • Description:描述通路的含义;

  • pathwayGenes:通路的组成基因。

    it will be used to calculate the similarities between the pathways. It can be leading edge genes or the full gene list. The ID type (Ensembl, Gene symbol, etc.) does not matter but should be the same between all the pathways. The genes should be separated by “/”.

  • 通路富集程度的列,可以为P值;对于GSEA结果也可以是NES;

  • 通路基因数目的列。

1
2
3
4
5
6
data(geneList)
enrich <- gseGO(geneList, OrgDb = org.Hs.eg.db, ont = 'CC')
enrich_df = enrich@result

enrich_df2plot = enrich_df[,c("Description","core_enrichment","NES","setSize")]
colnames(enrich_df2plot) = c("Description","pathwayGenes","NES","Size")

此外还有其它参数可以调整,包括聚类、颜色等

  • 计算通路相似度方法 simMethod = c(“jaccard”, “cosine”, “cor”)
  • 聚类算法 clustMethod = c(“markov”, “hier”, “spectral”)
  • 类别名 clustNameMethod = c(“pagerank”, “hits”, “none”)
  • 筛选类-每类包含的最小通路数目 minClusterSize = 2
  • 认为类别内/间,通路相似的阈值 innerCutoff/outerCutoff
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
set.seed(123) # 如果当前网络排版不理想,可以调整seed
net.list= enrichmentNetwork(enrich_df2plot, 
                            simMethod = "jaccard", 
                            clustMethod = "markov",
                            clustNameMethod = "pagerank",
                            
                            minClusterSize = 5,
                            innerCutoff = 0.1,
                            outerCutoff = 0.5,
                            drawEllipses = FALSE,
                            fontSize = 3,
                            repelLabels = FALSE,
                            
                            colorBy = "NES",
                            colorType = "nes", # c("nes", "pval")
                            pCutoff = -10,     # when colorType = "pval"
                            nodeSize = "Size",
                            
                            plotOnly = FALSE)

net_cluster = net.list$clusters
table(net_cluster$Cluster)
#                        ciliary plasm condensed chromosome, centromeric region 
#                                    6                                        6 
#     external encapsulating structure                          mitotic spindle 
#                                    5                                       12 
# postsynaptic specialization membrane                            vesicle lumen 
#                                    5                                        6 
net.list$plot

image-20230617100401078