1.功能注释
2.功能富集
代码语言:javascript复制rm(list = ls())
options(stringsAsFactors = F)
library(clusterProfiler)
library(org.Hs.eg.db)
library(GSEABase)
library(ggplot2)
library(tidyverse)
## Error in download.KEGG.Path(species)
# https://github.com/YuLab-SMU/clusterProfiler/pull/471
getOption("clusterProfiler.download.method")
#R.utils::setOption("clusterProfiler.download.method",'auto')
options(clusterProfiler.download.method = "wininet") #windows系统用winiet
#options(clusterProfiler.download.method = "wget")
getOption("clusterProfiler.download.method")
# 读取差异分析结果
load(file = "data/Step03-edgeR_nrDEG.Rdata")
ls()
# 提取所有差异表达的基因名
DEG <- DEG_edgeR_symbol[DEG_edgeR_symbol$regulated!="normal",2]
DEG <- as.character(na.omit(DEG))
head(DEG)
## ===GO数据库, 输出所有结果,后续可根据pvalue挑选结果
ego_CC <- enrichGO(gene=DEG, OrgDb= 'org.Hs.eg.db', keyType='SYMBOL', ont="CC", pvalueCutoff= 1,qvalueCutoff= 1)
ego_MF <- enrichGO(gene=DEG, OrgDb= 'org.Hs.eg.db', keyType='SYMBOL', ont="MF", pvalueCutoff= 1,qvalueCutoff= 1)
ego_BP <- enrichGO(gene=DEG, OrgDb= 'org.Hs.eg.db', keyType='SYMBOL', ont="BP", pvalueCutoff= 1,qvalueCutoff= 1)
p_BP <- barplot(ego_BP,showCategory = 10, label_format = 100) ggtitle("Biological process") #label_format 是指label一行显示的字符长度
p_CC <- barplot(ego_CC,showCategory = 10, label_format = 100) ggtitle("Cellular component")
p_MF <- barplot(ego_MF,showCategory = 10, label_format = 100) ggtitle("Molecular function")
plotc <- p_BP/p_CC/p_MF
plotc
ggsave('result/6.enrichGO.png', plotc, width = 10,height = 16)
ego_BP <- data.frame(ego_BP)
ego_CC <- data.frame(ego_CC)
ego_MF <- data.frame(ego_MF)
write.csv(ego_BP,'result/6.enrichGO_BP.csv')
write.csv(ego_CC,'result/6.enrichGO_CC.csv')
write.csv(ego_MF,'result/6.enrichGO_MF.csv')
## === KEGG
genelist <- bitr(gene=DEG, fromType="SYMBOL", toType="ENTREZID", OrgDb='org.Hs.eg.db')
genelist <- pull(genelist,ENTREZID)
ekegg <- enrichKEGG(gene = genelist, organism = 'hsa', pvalueCutoff = 1, qvalueCutoff = 1)
p1 <- barplot(ekegg, showCategory=10,label_format=100)
p2 <- dotplot(ekegg, showCategory=10,label_format=100)
plotc = p1/p2
plotc
ggsave('result/6.enrichKEGG.png', plot = plotc, width = 8, height = 10)
ekegg <- data.frame(ekegg)
write.csv(ekegg,'result/6.enrichKEGG.csv')
## === 其他数据库通路
geneset <- read.gmt("data/MsigDB/v7.4/h.all.v7.4.symbols.gmt")
table(geneset$term)
geneset$term <- gsub(pattern = "HALLMARK_","", geneset$term)
geneset$term <- str_to_title(geneset$term)
my_path <- enricher(gene=DEG, pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE=geneset)
p1 <- barplot(my_path, showCategory=15,color = "pvalue")
p1
ggsave("result/6.enrich_HALLMARK.png", plot = p1, width = 8, height = 7)
my_path <- data.frame(my_path)
write.csv(my_path,"result/6.enrich_HALLMARK.csv")
3.功能富集 GSEA &GSVA
GSEA:基因集表达分析
总共有9大类