1、GO

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
GO_data <- clusterProfiler:::get_GO_data("org.Hs.eg.db", "ALL", "SYMBOL") 
names(GO_data)
# [1] "PATHID2NAME"  "EXTID2PATHID" "GO2ONT"       "PATHID2EXTID"

###(1)GO term的组成基因
class(GO_data$PATHID2EXTID)
#[1] "list"
GO_data$PATHID2EXTID[1]
# $`GO:0000002`
# [1] "PARP1"    "SLC25A4"  "DNA2"     "TYMP"     "LIG3"     "MEF2A"   
# [7] "MPV17"    "OPA1"     "TOP3A"    "TP53"     "LONP1"    "AKT3"    
# [13] "PPARGC1A" "POLG2"    "SLC25A36" "PIF1"     "SESN2"    "SLC25A33"
# [19] "MGME1"    "PRIMPOL"  "STOX1"  

###(2)基因所涉及的通路
class(GO_data$EXTID2PATHID)
#[1] "list"
GO_data$EXTID2PATHID[1]
# $A1BG
# [1] "GO:0001775" "GO:0002252" "GO:0002263" "GO:0002274" "GO:0002275" "GO:0002283" "GO:0002366"
# [8] "GO:0002376" "GO:0002443" "GO:0002444" "GO:0002446" "GO:0002576" "GO:0003674" "GO:0005575"
# ...

###(3)GO term的名字
class(GO_data$PATHID2NAME)
#[1] "character"
GO_data$PATHID2NAME[1]
# GO:0000001 
# "mitochondrion inheritance"

###(4)GO term的类别
class(GO_data$GO2ONT)
#[1] "character"
GO_data$GO2ONT[1]
# GO:0000002 
# "BP"
table(GO_data$GO2ONT)
# BP    CC    MF 
# 16013  1981  4755 

library(tidyverse)
go_name = reshape2::melt(GO_data$PATHID2NAME) %>% 
	rownames_to_column("ID") %>%
	dplyr::rename("Name"="value")
go_type = reshape2::melt(GO_data$GO2ONT) %>% 
	rownames_to_column("ID") %>%
	dplyr::rename("Type"="value")
go_info = inner_join(go_name, go_type) %>% 
	dplyr::mutate(GSEA=toupper(gsub(" ","_",paste0("GO",Type," ",Name))))
dim(go_info)
head(go_info)
table(rownames(brca_enrich_kegg) %in% go_info$GSEA)
          # ID                                               Name Type
# 1 GO:0000002                   mitochondrial genome maintenance   BP
# 2 GO:0000003                                       reproduction   BP
# 3 GO:0000009             alpha-1,6-mannosyltransferase activity   MF
# 4 GO:0000010          trans-hexaprenyltranstransferase activity   MF
# 5 GO:0000012                         single strand break repair   BP
# 6 GO:0000014 single-stranded DNA endodeoxyribonuclease activity   MF
                                                     # GSEA
# 1                   GOBP_MITOCHONDRIAL_GENOME_MAINTENANCE
# 2                                       GOBP_REPRODUCTION
# 3             GOMF_ALPHA-1,6-MANNOSYLTRANSFERASE_ACTIVITY
# 4          GOMF_TRANS-HEXAPRENYLTRANSTRANSFERASE_ACTIVITY
# 5                         GOBP_SINGLE_STRAND_BREAK_REPAIR
# 6 GOMF_SINGLE-STRANDED_DNA_ENDODEOXYRIBONUCLEASE_ACTIVITY
  • 此外 GO.db包也提供了除组成基因以外的GO注释信息
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
library(GO.db)
keytypes(GO.db)
# [1] "DEFINITION" "GOID"       "ONTOLOGY"   "TERM" 

goids = keys(GO.db, keytype = "GOID")[1:3]
# [1] "GO:0000001" "GO:0000002" "GO:0000003"

goids_anno = AnnotationDbi::select(GO.db,
       keys = goids,
       columns = c("TERM","ONTOLOGY","DEFINITION"), #其中DEFINITION为term的详细描述
       keytype="GOID")
#所有的BP term的GO id
goBP = select(GO.db,
              keys = "BP",
              columns = c("GOID"),
              keytype="ONTOLOGY")

2、KEGG

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
hsa_kegg <- clusterProfiler::download_KEGG("hsa")
names(hsa_kegg)
# [1] "KEGGPATHID2EXTID" "KEGGPATHID2NAME"

### KEGG id与name
head(z)
#       from                                       to
# 1 hsa00010             Glycolysis / Gluconeogenesis
# 2 hsa00020                Citrate cycle (TCA cycle)
# 3 hsa00030                Pentose phosphate pathway

### KEGG id的组成基因
head(hsa_kegg$KEGGPATHID2EXTID)
#       from    to
# 1 hsa00010 10327
# 2 hsa00010   124
# 3 hsa00010   125