1、GSVA函数

1
2
3
4
5
6
7
# BiocManager::install("GSVA")
library(GSVA)
?gsva
gsva(expr = ,   #metrix格式表达矩阵(行--基因,列--样本)
     gset.idx.list = , #list格式基因集
     method=c("gsva", "ssgsea", "zscore", "plage"),  # defaul:gsva
     kcdf=c("Gaussian", "Poisson", "none"))  # default:Gaussian

如上所示,gsva()函数有4个关键参数。

  • 前两个参数分别提供表达矩阵与基因集,其中基因名需要为相同的格式。
  • method参数设置方法,默认为gsva
  • kcdf参数设置分布模拟类型,默认为Gaussian,适合于array数据以及log标准化(CPM/RPKM/TPM等)的RNA-seq数据。Poisson分布适合RNA-seq的原始count矩阵。

2、示例数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
## (1) 表达矩阵
# BiocManager::install("GSVAdata")
library(GSVAdata)
data(commonPickrellHuang)

### array矩阵
array_mt = exprs(huangArrayRMAnoBatchCommon_eset)
dim(array_mt)
# [1] 11508    36
array_mt[1:4,1:4]
#        NA19099  NA18523  NA19144  NA19137
# 8567  8.370526 8.544890 8.270395 8.585984
# 23139 7.536228 7.232781 7.469420 7.801986
# 7580  6.325102 6.405961 6.510249 6.285510
# 55619 9.134495 9.049938 9.704679 9.285202

### count矩阵
count_mt = exprs(pickrellCountsArgonneCQNcommon_eset)
dim(count_mt)
# [1] 11508    36
count_mt[1:4,1:4]
#       NA19099 NA18523 NA19144 NA19137
# 8567      326     209     318     343
# 23139     255     169     245     361
# 7580       72      69     124      76
# 55619     487     590     678     540

## (2) 基因集
library(tidyverse)
library(msigdbr)
hm_H = msigdbr(species = "Homo sapiens",
               category = "H",
               subcategory = NULL) %>% as.data.frame() %>% 
  dplyr::select(gs_cat, gs_subcat, gs_name, entrez_gene)
hm_H.list = split(hm_H$entrez_gene, hm_H$gs_name)
str(head(hm_H.list))
# List of 6
# $ HALLMARK_ADIPOGENESIS       : int [1:210] 19 11194 10449 33 34 35 47 50 51 112 ...
# $ HALLMARK_ALLOGRAFT_REJECTION: int [1:335] 16 6059 10006 43 92 207 322 567 567 586 ...
# $ HALLMARK_ANDROGEN_RESPONSE  : int [1:102] 10257 11057 2181 87 9510 11047 9590 207 220 56172 ...
# $ HALLMARK_ANGIOGENESIS       : int [1:36] 350 351 894 1281 1290 6372 2260 11167 3685 182 ...
# $ HALLMARK_APICAL_JUNCTION    : int [1:231] 58 60 70 71 72 87 88 89 81 81 ...
# $ HALLMARK_APICAL_SURFACE     : int [1:46] 102 79602 79602 84632 9465 351 50617 5205 2683 672 ...

3、GSVA分析

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
## (1) array + gsva
res_gsva = gsva(array_mt, hm_H.list)
class(res_gsva)
# [1] "matrix" "array"
dim(res_gsva)
# [1] 50 36
res_gsva[1:4,1:4]
#                                  NA19099     NA18523      NA19144    NA19137
# HALLMARK_ADIPOGENESIS        -0.21147644  0.08157479  0.004672524 -0.1656925
# HALLMARK_ALLOGRAFT_REJECTION -0.32620137 -0.44105423 -0.452996166 -0.4288909
# HALLMARK_ANDROGEN_RESPONSE    0.02056377  0.09123573 -0.135386552 -0.2198959
# HALLMARK_ANGIOGENESIS        -0.02435541 -0.18282244  0.281235763  0.5114103

## (2) count + ssgsea
res_gsva = gsva(count_mt, hm_H.list, 
                method = "ssgsea",
                kcdf = "Poisson")
class(res_gsva)
# [1] "matrix" "array"
dim(res_gsva)
# [1] 50 36
res_gsva[1:4,1:4]
#                                NA19099    NA18523   NA19144   NA19137
# HALLMARK_ADIPOGENESIS        0.3288768 0.32899026 0.2893928 0.3297140
# HALLMARK_ALLOGRAFT_REJECTION 0.5018248 0.49183093 0.4672342 0.4531009
# HALLMARK_ANDROGEN_RESPONSE   0.3210905 0.28289473 0.2818011 0.2845855
# HALLMARK_ANGIOGENESIS        0.1794361 0.07705857 0.2076328 0.1276449

4、limma差异分析

  • 以上面array + gsva的分析结果为例,随机模拟分组进行limma差异分析
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
## 模拟分组
meta = data.frame(sample=colnames(array_mt),
                  group=rep(c("G1","G2"), each=18))
head(meta)
#    sample group
# 1 NA19099    G1
# 2 NA18523    G1
# 3 NA19144    G1
# 4 NA19137    G1
# 5 NA18861    G1
# 6 NA19116    G1

## limma差异分析
library(limma)
mod <- model.matrix(~ factor(meta$group))
colnames(mod) <- c("G1", "G2")
fit <- lmFit(res_gsva, mod)
fit <- eBayes(fit)
tt <- topTable(fit, coef=2, n=Inf)
head(tt)
#                                        logFC     AveExpr         t    P.Value adj.P.Val         B
# HALLMARK_COMPLEMENT               0.08684907 -0.10731154  2.495909 0.01676849 0.3324198 -3.059104
# HALLMARK_HEME_METABOLISM          0.07942485 -0.04250442  2.238926 0.03076698 0.3324198 -3.486635
# HALLMARK_IL2_STAT5_SIGNALING      0.07839683 -0.09276742  2.193525 0.03411795 0.3324198 -3.558694
# HALLMARK_MITOTIC_SPINDLE         -0.10721385 -0.07656961 -2.057416 0.04617883 0.3324198 -3.768076
# HALLMARK_IL6_JAK_STAT3_SIGNALING  0.09342746 -0.20104143  2.026107 0.04943145 0.3324198 -3.814790
# HALLMARK_INFLAMMATORY_RESPONSE    0.10211588 -0.10571870  1.996939 0.05263925 0.3324198 -3.857810