IOBR包肿瘤signature打分与免疫浸润分析

IOBR包集signature打分与免疫浸润分析为一体的肿瘤数据分析工具，由南方医科大学南方医院廖旺军教授，曾东强博士等人于2021年7月发表于Frontiers in Immunology，引用数以超过100余次。现根据其github教程学习其部分功能用法。

Paper：IOBR: Multi-Omics Immuno-Oncology Biological Research to Decode Tumor Microenvironment and Signatures

DOI：https://doi.org/10.3389/fimmu.2021.687975

Github：https://github.com/IOBR/IOBR

1、安装R包

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24


# options("repos"= c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
# options(BioC_mirror="http://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
depens<-c('tibble', 'survival', 'survminer', 'sva', 'limma', "DESeq2","devtools",
          'limSolve', 'GSVA', 'e1071', 'preprocessCore', 'ggplot2', "biomaRt",
          'ggpubr', "devtools", "tidyHeatmap", "caret", "glmnet", "ppcor","timeROC","pracma")
for(i in 1:length(depens)){
  depen<-depens[i]
  if (!requireNamespace(depen, quietly = TRUE)) BiocManager::install(depen)
}
if (!requireNamespace("remotes", quietly = TRUE)) install("remotes")
if (!requireNamespace("EPIC", quietly = TRUE))
  remotes::install_github("GfellerLab/EPIC", build_vignettes=TRUE)
if (!requireNamespace("MCPcounter", quietly = TRUE))
  remotes::install_github("ebecht/MCPcounter",ref="master", subdir="Source")
if (!requireNamespace("estimate", quietly = TRUE)){
  rforge <- "http://r-forge.r-project.org"
  install.packages("estimate", repos=rforge, dependencies=TRUE)
}

if (!requireNamespace("IOBR", quietly = TRUE))
  remotes::install_github("IOBR/IOBR",ref="master")

library(IOBR)

2、准备数据

对于RNAseq数据
- 作者建议使用UCSCXenaTools下载肿瘤表达数据，再结合IOBR函数进行TPM标准化（如下）；
- 之前学习了解过TCGAbiolinks，可以直接提取TPM数据，觉得更合适些。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13


# "https://gdc.xenahubs.net/download/TCGA-STAD.htseq_counts.tsv.gz"
eset_stad = data.table::fread("TCGA-STAD.htseq_counts.tsv.gz") %>% as.data.frame()
eset_stad$Ensembl_ID<-substring(eset_stad$Ensembl_ID, 1, 15)
eset_stad<-column_to_rownames(eset_stad, var = "Ensembl_ID")
# Revert back to original format because the data from UCSC was log2(x+1)transformed.
eset_stad<-(2^eset_stad)+1
eset_stad<-count2tpm(countMat = eset_stad, idType = "Ensembl", org="hsa")
eset_stad[1:4,1:4]
#        TCGA-D7-5577-01A TCGA-D7-6818-01A TCGA-BR-7958-01A TCGA-D7-8572-01A
# TSPAN6       33.4223214      16.91678635       24.2417334       20.2423671
# TNMD          0.1237997       0.06738584        0.1224305        0.1696897
# DPM1         15.9824956       5.35050514       11.1627196       16.3248532
# SCYL3         4.9644353       3.15739307        5.5790120        8.0491354

对于array数据
- 一般使用GEOquery包直接下载即可，关键步骤是根据芯片ID注释基因名
- IOBR提供了相关注释函数，仅需提供注释信息即可。

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29


library("GEOquery")
# NOTE: This process may take a few minutes which depends on the internet connection speed. Please wait for its completion.
eset_geo<-getGEO(GEO     = "GSE100935",
                 getGPL  = F,
                 destdir = "./")
eset    <-eset_geo[[1]]
eset    <-exprs(eset)

head(anno_hug133plus2)
# # A tibble: 6 × 2
# probe_id  symbol 
# <fct>     <fct>  
#   1 1007_s_at MIR4640
# 2 1053_at   RFC2   
# 3 117_at    HSPA6  
# 4 121_at    PAX8   
# 5 1255_g_at GUCA1A 
# 6 1294_at   MIR5193
eset<-anno_eset(eset       = eset,
                annotation = anno_hug133plus2,
                symbol     = "symbol",
                probe      = "probe_id",
                method     = "mean")
eset[1:4, 1:4]
#              GSM2696792 GSM2696793 GSM2696794 GSM2696795
# SH3KBP1        14.30308   14.56398   14.37668   14.47983
# RPL41          14.36095   14.32783   14.33181   14.34614
# LOC101928826   14.18638   14.38247   14.34530   14.26433
# EEF1A1         14.13245   14.15141   14.08980   14.05879

3、signature打分

内置signature基因集（list对象）

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16


# (1)全部
str(signature_collection) # 264
dim(signature_collection_citation)

# (2)可分为3大类
str(signature_tme) #119

str(signature_metabolism) #114

str(signature_tumor) # 16

# (3) 又可分为40个主题
str(sig_group)
table(names(signature_collection) %in% unlist(sig_group))
# FALSE  TRUE 
#    22   242 

打分函数

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30


calculate_sig_score
sig_score <-calculate_sig_score(pdata           = NULL,
                                eset            = eset_stad,
                                signature       = signature_tme,
                                method          = "pca",
                                mini_gene_count = 2)
# pdata：样本表型，如无可忽略
# eset ：标准化表达矩阵
# signature ：list格式的基因集
## 内置包括  'signature_tme', 'signature_metabolism','signature_collection', 'go_bp','kegg','hallmark'

# method ：打分方法
## 可选  'pca', 'ssgsea', 'zscore','integration'（前3种都计算一遍）

# mini_gene_count：单个基因集包含的最小基因数
# parallel.size ： 调用线程数

sig_tme<-calculate_sig_score(pdata           = NULL,
                             eset            = eset_stad,
                             signature       = signature_tme,
                             method          = "pca",
                             mini_gene_count = 2)
sig_tme[1:4,1:4]
# # A tibble: 4 × 4
#   ID               CD_8_T_effector    DDR   APM
#   <chr>                      <dbl>  <dbl> <dbl>
# 1 TCGA-D7-5577-01A           5.00   1.63  2.82 
# 2 TCGA-D7-6818-01A          -2.45   0.199 0.373
# 3 TCGA-BR-7958-01A           6.38   3.58  3.34 
# 4 TCGA-D7-8572-01A           0.848 12.2   0.272

4、联合表型分析

在进行一系列signature打分之分后，可根据样本表型进行分组比较

若表型是二分类变量

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33


data("imvigor210_pdata")
imvigor210_pdata[1:5, 1:5]
pdata_group<-imvigor210_pdata[!imvigor210_pdata$BOR_binary=="NA",c("ID","BOR","BOR_binary")]
res<-iobr_cor_plot(pdata_group           = pdata_group,  #样本分组
                   id1                   = "ID",
                   group                 = "BOR_binary", 
                   is_target_continuous  = FALSE,
                   target                = NULL,

                   feature_data          = imvigor210_sig, #样本打分结果
                   id2                   = "ID",

                   category              = "signature",
                   signature_group       = sig_group[c(1)], # 一个主题的signature list

                   index                 = 1,                   #文件前缀名
                   ProjectID             = "IMvigor210",        #文件项目名
                   path                  = "1-BOR-relevant-signatures", #文件夹名

                   palette_box           = "paired1",
                   palette_corplot       = "pheatmap",
                   palette_heatmap       = 2
                   )
head(res)
# # A tibble: 6 × 8
#   sig_names                   p.value     NR     R statistic     p.adj log10pvalue stars
#   <chr>                         <dbl>  <dbl> <dbl>     <dbl>     <dbl>       <dbl> <fct>
# 1 Mismatch_Repair          0.00000921 -0.142 0.479    -0.620 0.0000608        5.04 **** 
# 2 Cell_cycle               0.0000105  -0.143 0.484    -0.627 0.0000608        4.98 **** 
# 3 DDR                      0.0000152  -0.140 0.474    -0.614 0.0000608        4.82 **** 
# 4 Homologous_recombination 0.0000331  -0.134 0.453    -0.587 0.0000893        4.48 **** 
# 5 Histones                 0.0000372  -0.127 0.429    -0.556 0.0000893        4.43 **** 
# 6 CellCycle_Reg            0.00134    -0.105 0.354    -0.459 0.00267          2.87 ** 

若表型是连续数值变量

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


res2 <-iobr_cor_plot(pdata_group           = pdata_group,
                   id1                   = "ID",
                   is_target_continuous  = TRUE,
                   target                = "HCP5",
                   group                 = "group3",

                   feature_data          = imvigor210_sig,
                   id2                   = "ID",

                   category              = "signature",
                   signature_group       = sig_group[c(1)],

                   padj_cutoff           = 1,
                   index                 = 2,
                   ProjectID             = "IMvigor210",
                   path                  = "3-HCP5-relevant-signatures",

                   palette_box           = "set2",
                   palette_corplot       = "pheatmap",
                   palette_heatmap       = 2
                   )

head(res2)
# # A tibble: 6 × 6
#   sig_names                                  p.value statistic    p.adj log10pvalue stars 
#   <chr>                                        <dbl>     <dbl>    <dbl>       <dbl> <fct> 
# 1 Positive_regulation_of_exosomal_secretion 2.80e-14   -0.393  3.36e-13      13.6   "****"
# 2 Ferroptosis                               1.02e- 7    0.281  6.11e- 7       6.99  "****"
# 3 EV_Cell_2020                              5.14e- 7    0.265  2.06e- 6       6.29  "****"
# 4 Nature_metabolism_Hypoxia                 1.36e- 4   -0.203  4.09e- 4       3.87  "***" 
# 5 CellCycle_Reg                             4.07e- 1    0.0446 9.07e- 1       0.391 "+"   
# 6 Homologous_recombination                  5.56e- 1   -0.0316 9.07e- 1       0.255 ""    

5、免疫浸润分析

IOBR包继承了8种免疫浸润分析的方法，使用方法如下

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15


cibersort<-deconvo_tme(eset = eset_stad[,1:10,], method = "cibersort", arrays = FALSE, perm = 200 )

epic<-deconvo_tme(eset = eset_stad[,1:10], method = "epic", arrays = FALSE)

mcp<-deconvo_tme(eset = eset_stad[,1:10], method = "mcpcounter")

xcell<-deconvo_tme(eset = eset_stad[,1:10], method = "xcell",arrays = FALSE)

estimate<-deconvo_tme(eset = eset_stad[,1:10], method = "estimate")

timer<-deconvo_tme(eset = eset_stad[,1:10], method = "timer", group_list = rep("stad",10))

quantiseq<-deconvo_tme(eset = eset_stad[,1:10], tumor = TRUE, arrays = FALSE, scale_mrna = TRUE, method = "quantiseq")

ips<-deconvo_tme(eset = eset_stad[,1:10], method = "ips", plot= FALSE)

结果可视化

1
2


library(tidyverse)
cell_bar_plot(input = epic, title = "Deconvolution Cell Fraction")

1、安装R包#

2、准备数据#

3、signature打分#

4、联合表型分析#

5、免疫浸润分析#

1、安装R包

2、准备数据

3、signature打分

4、联合表型分析

5、免疫浸润分析