1、TCGAbiolinks下载数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
library(TCGAbiolinks)
query <- GDCquery(
    project = "TCGA-CHOL", 
    data.category = "Simple Nucleotide Variation", 
    access = "open", 
    legacy = FALSE, 
    data.type = "Masked Somatic Mutation")
GDCdownload(query)
maf <- GDCprepare(query)
dim(maf)
# [1] 3764  141

## (1) 因后续需要,修改Tumor_Sample_Barcode列
maf$long_Barcode = maf$Tumor_Sample_Barcode 
maf$Tumor_Sample_Barcode = substr(maf$Tumor_Sample_Barcode,1,12)
length(unique(maf$Tumor_Sample_Barcode))
# 51

## (2) 读取临床生存数据
clinical = readxl::read_xlsx("TCGA_Pan_Cancer_Clinical_Data_mmc1.xlsx")
clinical_sle = clinical %>% 
	dplyr::filter(type=="CHOL")  %>% 
	dplyr::select(bcr_patient_barcode, OS, OS.time, clinical_stage) %>% 
	dplyr::rename(Tumor_Sample_Barcode=bcr_patient_barcode)
dim(clinical_sle)
# 45 

2、maftools可视化

1
2
3
4
5
6
7
8
9
library(maftools)

maf_obj = read.maf(maf = maf, clinicalData = clinical_sle)

#每个样本的突变情况统计
getSampleSummary(maf_obj)

#每个基因的突变类型统计
getGeneSummary(maf_obj)

2.1概括图

1
2
3
plotmafSummary(maf = maf_obj, rmOutlier = TRUE, 
			   addStat = 'median', dashboard = TRUE, 
			   titvRaw = FALSE)
image-20230409170234902

2.2 基因瀑布图

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
oncoplot(maf = maf_obj, top = 10)  # 如下图
## 添加临床注释

oncoplot(maf = maf_obj, top = 10,
	  	 clinicalFeatures = c('clinical_stage',"OS"),
	  	 draw_titv = TRUE)

## 选择特定基因集
set.seed(42)
genes = sample(maf_obj@data$Hugo_Symbol,10)
oncoplot(maf = maf_obj, genes = genes,
	  	 clinicalFeatures = c('clinical_stage',"OS"),
	  	 draw_titv = TRUE)
image-20230409170615418

2.3 转换颠倒统计

1
2
3
4
5
# Transition 转换 : 嘌呤(AG)或嘧啶(CT)内部之间转换
# Transversions 颠倒:嘌呤与嘧啶间互相转换

maf_obj.titv = titv(maf = maf_obj, plot = FALSE, useSyn = TRUE)
plotTiTv(res = maf_obj.titv)
image-20230409171216097

2.4 基因对突变统计

1
2
3
4
5
6
7
8
# green: co-occuring
# yellow: mutually exclusive

somaticInteractions(maf = maf_obj, top = 25, pvalue = c(0.05, 0.1))

set.seed(42)
genes = sample(maf_obj@data$Hugo_Symbol,25)
somaticInteractions(maf = maf_obj, genes  = genes , pvalue = c(0.05, 0.1))
image-20230409171336514

2.5 生存分析

  • 根据特定基因是否突变将病人分成WT与Mutant两组
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
mafSurvival(maf = maf_obj, genes = 'TP53', 
			time = 'OS.time', Status = 'OS')
#     Group medianTime  N
# 1: Mutant        732  4
# 2:     WT        650 41

## 提取信息
# maf_obj@clinical.data %>% 
# 	dplyr::mutate(Group=ifelse(Tumor_Sample_Barcode %in% 
# 		subset(maf_obj@data, Hugo_Symbol=="TP53")$Barcode,
# 		"Mutant","WT")) 
image-20230409171710303

2.6 基因对的生存相关性

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
prog_geneset = survGroup(maf = maf_obj, top = 200, geneSetSize = 2, 
						 time = "OS.time", Status = "OS", 
						 verbose = FALSE,minSamples = 3)
prog_geneset
#    Gene_combination P_value       hr WT Mutant
# 1:     PBRM1_PLXNA4   0.243 2.36e+00 42      3
# 2:       PBRM1_PCLO   0.294 3.46e-01 42      3
# 3:       PBRM1_TP53   0.320 3.64e-01 42      3
mafSurvGroup(maf = maf_obj, geneSet = c("PBRM1", "PLXNA4"), 
			 time = "OS.time", Status = "OS")
image-20230409171856663