使用clusterProfiler下载GO&KEGG通路基因
1、GO 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 GO_data <- clusterProfiler:::get_GO_data("org.Hs.eg.db", "ALL", "SYMBOL") names(GO_data) # [1] "PATHID2NAME" "EXTID2PATHID" "GO2ONT" "PATHID2EXTID" ###(1)GO term的组成基因 class(GO_data$PATHID2EXTID) #[1] "list" GO_data$PATHID2EXTID[1] # $`GO:0000002` # [1] "PARP1" "SLC25A4" "DNA2" "TYMP" "LIG3" "MEF2A" # [7] "MPV17" "OPA1" "TOP3A" "TP53" "LONP1" "AKT3" # [13] "PPARGC1A" "POLG2" "SLC25A36" "PIF1" "SESN2" "SLC25A33" # [19] "MGME1" "PRIMPOL" "STOX1" ###(2)基因所涉及的通路 class(GO_data$EXTID2PATHID) #[1] "list" GO_data$EXTID2PATHID[1] # $A1BG # [1] "GO:0001775" "GO:0002252" "GO:0002263" "GO:0002274" "GO:0002275" "GO:0002283" "GO:0002366" # [8] "GO:0002376" "GO:0002443" "GO:0002444" "GO:0002446" "GO:0002576" "GO:0003674" "GO:0005575" # ... ###(3)GO term的名字 class(GO_data$PATHID2NAME) #[1] "character" GO_data$PATHID2NAME[1] # GO:0000001 # "mitochondrion inheritance" ###(4)GO term的类别 class(GO_data$GO2ONT) #[1] "character" GO_data$GO2ONT[1] # GO:0000002 # "BP" table(GO_data$GO2ONT) # BP CC MF # 16013 1981 4755 library(tidyverse) go_name = reshape2::melt(GO_data$PATHID2NAME) %>% rownames_to_column("ID") %>% dplyr::rename("Name"="value") go_type = reshape2::melt(GO_data$GO2ONT) %>% rownames_to_column("ID") %>% dplyr::rename("Type"="value") go_info = inner_join(go_name, go_type) %>% dplyr::mutate(GSEA=toupper(gsub(" ","_",paste0("GO",Type," ",Name)))) dim(go_info) head(go_info) table(rownames(brca_enrich_kegg) %in% go_info$GSEA) # ID Name Type # 1 GO:0000002 mitochondrial genome maintenance BP # 2 GO:0000003 reproduction BP # 3 GO:0000009 alpha-1,6-mannosyltransferase activity MF # 4 GO:0000010 trans-hexaprenyltranstransferase activity MF # 5 GO:0000012 single strand break repair BP # 6 GO:0000014 single-stranded DNA endodeoxyribonuclease activity MF # GSEA # 1 GOBP_MITOCHONDRIAL_GENOME_MAINTENANCE # 2 GOBP_REPRODUCTION # 3 GOMF_ALPHA-1,6-MANNOSYLTRANSFERASE_ACTIVITY # 4 GOMF_TRANS-HEXAPRENYLTRANSTRANSFERASE_ACTIVITY # 5 GOBP_SINGLE_STRAND_BREAK_REPAIR # 6 GOMF_SINGLE-STRANDED_DNA_ENDODEOXYRIBONUCLEASE_ACTIVITY 此外 GO.db包也提供了除组成基因以外的GO注释信息 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 library(GO.db) keytypes(GO.db) # [1] "DEFINITION" "GOID" "ONTOLOGY" "TERM" goids = keys(GO.db, keytype = "GOID")[1:3] # [1] "GO:0000001" "GO:0000002" "GO:0000003" goids_anno = AnnotationDbi::select(GO.db, keys = goids, columns = c("TERM","ONTOLOGY","DEFINITION"), #其中DEFINITION为term的详细描述 keytype="GOID") #所有的BP term的GO id goBP = select(GO.db, keys = "BP", columns = c("GOID"), keytype="ONTOLOGY") 2、KEGG 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 hsa_kegg <- clusterProfiler::download_KEGG("hsa") names(hsa_kegg) # [1] "KEGGPATHID2EXTID" "KEGGPATHID2NAME" ### KEGG id与name head(z) # from to # 1 hsa00010 Glycolysis / Gluconeogenesis # 2 hsa00020 Citrate cycle (TCA cycle) # 3 hsa00030 Pentose phosphate pathway ### KEGG id的组成基因 head(hsa_kegg$KEGGPATHID2EXTID) # from to # 1 hsa00010 10327 # 2 hsa00010 124 # 3 hsa00010 125