1、TCGAbiolinks下载数据 使用TCGAbiolinks下载特定肿瘤类型的SNV数据 https://bioconductor.org/packages/release/bioc/vignettes/TCGAbiolinks/inst/doc/mutation.html 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 library(TCGAbiolinks) query <- GDCquery( project = "TCGA-CHOL", data.category = "Simple Nucleotide Variation", access = "open", legacy = FALSE, data.type = "Masked Somatic Mutation") GDCdownload(query) maf <- GDCprepare(query) dim(maf) # [1] 3764 141 ## (1) 因后续需要,修改Tumor_Sample_Barcode列 maf$long_Barcode = maf$Tumor_Sample_Barcode maf$Tumor_Sample_Barcode = substr(maf$Tumor_Sample_Barcode,1,12) length(unique(maf$Tumor_Sample_Barcode)) # 51 ## (2) 读取临床生存数据 clinical = readxl::read_xlsx("TCGA_Pan_Cancer_Clinical_Data_mmc1.xlsx") clinical_sle = clinical %>% dplyr::filter(type=="CHOL") %>% dplyr::select(bcr_patient_barcode, OS, OS.time, clinical_stage) %>% dplyr::rename(Tumor_Sample_Barcode=bcr_patient_barcode) dim(clinical_sle) # 45 2、maftools可视化 https://bioconductor.org/packages/release/bioc/vignettes/maftools/inst/doc/maftools.html https://www.jieandze1314.com/post/cnposts/237/ 1 2 3 4 5 6 7 8 9 library(maftools) maf_obj = read.maf(maf = maf, clinicalData = clinical_sle) #每个样本的突变情况统计 getSampleSummary(maf_obj) #每个基因的突变类型统计 getGeneSummary(maf_obj) 2.1概括图 1 2 3 plotmafSummary(maf = maf_obj, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE) 2.2 基因瀑布图 1 2 3 4 5 6 7 8 9 10 11 12 13 oncoplot(maf = maf_obj, top = 10) # 如下图 ## 添加临床注释 oncoplot(maf = maf_obj, top = 10, clinicalFeatures = c('clinical_stage',"OS"), draw_titv = TRUE) ## 选择特定基因集 set.seed(42) genes = sample(maf_obj@data$Hugo_Symbol,10) oncoplot(maf = maf_obj, genes = genes, clinicalFeatures = c('clinical_stage',"OS"), draw_titv = TRUE) 2.3 转换颠倒统计 1 2 3 4 5 # Transition 转换 : 嘌呤(AG)或嘧啶(CT)内部之间转换 # Transversions 颠倒:嘌呤与嘧啶间互相转换 maf_obj.titv = titv(maf = maf_obj, plot = FALSE, useSyn = TRUE) plotTiTv(res = maf_obj.titv) 2.4 基因对突变统计 1 2 3 4 5 6 7 8 # green: co-occuring # yellow: mutually exclusive somaticInteractions(maf = maf_obj, top = 25, pvalue = c(0.05, 0.1)) set.seed(42) genes = sample(maf_obj@data$Hugo_Symbol,25) somaticInteractions(maf = maf_obj, genes = genes , pvalue = c(0.05, 0.1)) 2.5 生存分析 根据特定基因是否突变将病人分成WT与Mutant两组 1 2 3 4 5 6 7 8 9 10 11 mafSurvival(maf = maf_obj, genes = 'TP53', time = 'OS.time', Status = 'OS') # Group medianTime N # 1: Mutant 732 4 # 2: WT 650 41 ## 提取信息 # maf_obj@clinical.data %>% # dplyr::mutate(Group=ifelse(Tumor_Sample_Barcode %in% # subset(maf_obj@data, Hugo_Symbol=="TP53")$Barcode, # "Mutant","WT")) 2.6 基因对的生存相关性 1 2 3 4 5 6 7 8 9 10 prog_geneset = survGroup(maf = maf_obj, top = 200, geneSetSize = 2, time = "OS.time", Status = "OS", verbose = FALSE,minSamples = 3) prog_geneset # Gene_combination P_value hr WT Mutant # 1: PBRM1_PLXNA4 0.243 2.36e+00 42 3 # 2: PBRM1_PCLO 0.294 3.46e-01 42 3 # 3: PBRM1_TP53 0.320 3.64e-01 42 3 mafSurvGroup(maf = maf_obj, geneSet = c("PBRM1", "PLXNA4"), time = "OS.time", Status = "OS")