1.GO富集分析
代码语言:javascript复制rm(list = ls())
load(file = 'step4output.Rdata')
library(clusterProfiler)
library(ggthemes)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)
library(stringr)
library(enrichplot)
1)输入数据
代码语言:javascript复制gene_up = deg$ENTREZID[deg$change == 'up']
gene_down = deg$ENTREZID[deg$change == 'down']
gene_diff = c(gene_up,gene_down)
!如何避免运行限速步骤
代码语言:javascript复制# 初阶版本
if(F){
a = 1 #假装是限速步骤
#保存运行结果,下次运行到这里时直接加载结果
save(a,file = "a.Rdata")
}
load("a.Rdata")
# 高阶版本
f = "a.Rdata"
if(!file.exists(f)){
#只有f文件(a.Rdata)在工作目录下不存在时才运行,否则跳过这段代码
a = 1 #假装是限速步骤
print("bye")
#保存运行结果,下次运行到这里时直接加载结果
save(a,file = f)
}
load(f)
# 完美版本
q = "124"
f = paste0(q,"a.Rdata")
if(!file.exists(f)){
#只有f文件(a.Rdata)在工作目录下不存在时才运行,否则跳过这段代码
a = 1 #假装是限速步骤
print("bye")
#保存运行结果,下次运行到这里时直接加载结果
save(a,file = f)
}
load(f)
2)富集
代码语言:javascript复制#以下步骤耗时很长,设置了存在即跳过
f = paste0(gse_number,"_GO.Rdata")
if(!file.exists(f)){
ego <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "ALL",
readable = TRUE) #直接将entrezid id 翻译为symbol id
ego_BP <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "BP",
readable = TRUE)
#ont参数:One of "BP", "MF", and "CC" subontologies, or "ALL" for all three.
save(ego,ego_BP,file = f)
}
load(f)
3)可视化
代码语言:javascript复制#条带图
barplot(ego)
barplot(ego, split = "ONTOLOGY", font.size = 10,
showCategory = 5)
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")
#气泡图
dotplot(ego)
dotplot(ego, split = "ONTOLOGY", font.size = 10,
showCategory = 5)
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")
#(3)展示top通路的共同基因,要放大看。
#gl 用于设置下图的颜色
gl = deg$logFC
names(gl)=deg$ENTREZID
#Gene-Concept Network,要放大看
cnetplot(ego,
#layout = "star",
color.params = list(foldChange = gl),
showCategory = 3)
2.KEGG富集
可能会没有结果,因为数据库里的数据很少
代码语言:javascript复制# 2.KEGG pathway analysis----
#上调、下调、差异、所有基因
#(1)输入数据
gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
#(2)对上调/下调/所有差异基因进行富集分析
f2 = paste0(gse_number,"_KEGG.Rdata")
if(!file.exists(f2)){
kk.up <- enrichKEGG(gene = gene_up,
organism = 'hsa')
kk.down <- enrichKEGG(gene = gene_down,
organism = 'hsa')
kk.diff <- enrichKEGG(gene = gene_diff,
organism = 'hsa')
save(kk.diff,kk.down,kk.up,file = f2)
}
load(f2)
#(3)看看富集到了吗?https://mp.weixin.qq.com/s/NglawJgVgrMJ0QfD-YRBQg
table(kk.diff@result$p.adjust<0.05)
table(kk.up@result$p.adjust<0.05)
table(kk.down@result$p.adjust<0.05)
富集不到的补救措施:
代码语言:javascript复制#(4)双向图
# 富集分析所有图表默认都是用p.adjust,富集不到可以退而求其次用p值,在文中说明即可
source("kegg_plot_function.R")
g_kegg <- kegg_plot(kk.up,kk.down)
g_kegg
#g_kegg scale_y_continuous(labels = c(4,2,0,2,4,6))
3.辅助资料
# GSEA:https://www.yuque.com/docs/share/a67a180f-dd2b-4f6f-96c2-68a4b86fe862?#
# 富集分析学习更多:http://yulab-smu.top/clusterProfiler-book/index.html
# 弦图:https://www.yuque.com/xiaojiewanglezenmofenshen/dbwkg1/dgs65p
# GOplot:https://mp.weixin.qq.com/s/LonwdDhDn8iFUfxqSJ2Wew
# 网上的资料和宝藏无穷无尽,学好R语言慢慢发掘~
4.问题数据和常见错误分析
>代码和ppt来源于生信技能树