基因-蛋白-化合物ID转换
1、不同基因ID转换 1.1 org.Hs.eg.db包 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 library(dplyr) library(org.Hs.eg.db) keytypes(org.Hs.eg.db) # [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT" "ENSEMBLTRANS" "ENTREZID" # [7] "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME" "GENETYPE" "GO" # [13] "GOALL" "IPI" "MAP" "OMIM" "ONTOLOGY" "ONTOLOGYALL" # [19] "PATH" "PFAM" "PMID" "PROSITE" "REFSEQ" "SYMBOL" # [25] "UCSCKG" "UNIPROT" gene_symbol=c("RHO","CALM1","MEG3","GNGT1","SAG","RPGRIP1","TRPM1","PCP2","PCP4","AP1B1") gene_ids<-AnnotationDbi::select(org.Hs.eg.db, keys=as.character(gene_symbol), columns=c("ENSEMBL","ENTREZID"), #目标格式 keytype="SYMBOL") #目前的格式 gene_ids ##去重 gene_ids %>% dplyr::distinct(ENTREZID, .keep_all = T) # SYMBOL ENSEMBL ENTREZID # 1 RHO ENSG00000163914 6010 # 2 CALM1 ENSG00000198668 801 # 3 MEG3 ENSG00000214548 55384 # 4 GNGT1 ENSG00000127928 2792 # 5 SAG ENSG00000130561 6295 # 6 RPGRIP1 ENSG00000092200 57096 # 7 TRPM1 ENSG00000134160 4308 # 8 PCP2 ENSG00000174788 126006 # 9 PCP4 ENSG00000183036 5121 # 10 AP1B1 ENSG00000100280 162 1.2 biomaRt包 1 2 3 4 5 6 7 8 9 10 11 12 library("biomaRt") ensembl = useMart("ensembl",dataset="hsapiens_gene_ensembl") attributes = listAttributes(ensembl) attributes[1:5,] # library(httr) # httr::set_config(config(ssl_verifypeer = 0L)) gene_symbol=c("RHO","CALM1","MEG3","GNGT1","SAG","RPGRIP1","TRPM1","PCP2","PCP4","AP1B1") gene_ids2 <- getBM(filters= "hgnc_symbol", attributes= c("hgnc_symbol","ensembl_gene_id","entrezgene_id"), values = gene_symbol, mart= ensembl) gene_ids2 2、鼠源基因转为人类基因ID 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 musGenes <- c("Hmmr", "Tlx3", "Cpeb4") ## 方式1:直接大小写转换 toupper(musGenes) # [1] "HMMR" "TLX3" "CPEB4" ## 方式2:通过biomaRt包(不稳定) require("biomaRt") # library(httr) # httr::set_config(config(ssl_verifypeer = 0L)) human = useMart("ensembl", dataset = "hsapiens_gene_ensembl",host = "dec2021.archive.ensembl.org") mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl",host = "dec2021.archive.ensembl.org") genes = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = musGenes, mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T) ## 方式3:MGI 数据库 # https://support.bioconductor.org/p/129636/ library(dplyr) mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t") convert_mouse_to_human <- function(gene_list){ output = c() for(gene in gene_list){ class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="mouse, laboratory"))[['DB.Class.Key']] if(!identical(class_key, integer(0)) ){ human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="human"))[,"Symbol"] for(human_gene in human_genes){ output = append(output,human_gene) } } } return (output) } convert_mouse_to_human(musGenes) # 1] "HMMR" "TLX3" "CPEB4" 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 # # https://github.com/lishensuo/utils # # library("biomaRt") # # library(httr) # # httr::set_config(config(ssl_verifypeer = 0L)) # human = useMart("ensembl", dataset = "hsapiens_gene_ensembl",host = "dec2021.archive.ensembl.org") # mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl",host = "dec2021.archive.ensembl.org") # # # https://www.gencodegenes.org/mouse/ # dat = data.table::fread("gencode.vM33.basic.annotation.gtf.gz") # dat = subset(dat, V3 == "gene") # dat_sub = dat[,"V9"] %>% # separate(V9, into = c("gene_id","gene_type","gene_name","mgi_id","havana_gene"), sep = "; ") # dat_sub$gene_name2 = gsub('gencode.vM33.basic.annotation.gtf.gz "','',dat_sub$gene_name) # dat_sub$gene_name2 = gsub('"','',dat_sub$gene_name2) # # genes = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", # values = dat_sub$gene_name2, # mart = mouse, # attributesL = c("hgnc_symbol"), # martL = human, uniqueRows=T) # write.csv(genes, file = "mgi2hgnc_biomart.csv", row.names = F, quote = F) # head(genes) 3、蛋白质与基因ID转换 https://www.uniprot.org/uploadlists/ ...