差异分析可视化
代码语言:javascript复制rm(list = ls())
load(file = "step1output.Rdata")
load(file = "step4output.Rdata")
# 火山图
library(dplyr)
library(ggplot2)
dat = distinct(deg,symbol,.keep_all = T)
p <- ggplot(data = dat,
aes(x = logFC,
y = -log10(P.Value)))
geom_point(alpha=0.4, size=3.5,
aes(color=change))
scale_color_manual(values=c("blue", "grey","red"))
geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",linewidth=0.8)
geom_hline(yintercept = -log10(p_t),lty=4,col="black",linewidth=0.8)
theme_bw()#去掉灰色背景
p
for_label <- dat%>%
filter(symbol %in% c("HADHA","LRRFIP1"))
volcano_plot <- p
geom_point(size = 3, shape = 1, data = for_label)
ggrepel::geom_label_repel(
aes(label = symbol),
data = for_label,
color="black"
)
volcano_plot
差异基因热图
代码语言:javascript复制load(file = 'step2output.Rdata')
# 表达矩阵行名替换
exp = exp[dat$probe_id,]
rownames(exp) = dat$symbol
if(T){
#取前10上调和前10下调
library(dplyr)
dat2 = dat %>%
filter(change!="stable") %>%
arrange(logFC)
cg = c(head(dat2$symbol,10),
tail(dat2$symbol,10))
}else{
全部差异基因
代码语言:javascript复制 cg = dat$symbol[dat$change !="stable"]
length(cg)
}
n=exp[cg,]
dim(n)
差异基因热图
代码语言:javascript复制library(pheatmap)
annotation_col=data.frame(group=Group)
rownames(annotation_col)=colnames(n)
heatmap_plot <- pheatmap(n,show_colnames =F#不展示行名,
scale = "row",
#cluster_cols = F,
annotation_col=annotation_col,
breaks = seq(-3,3,length.out = 100)
)
heatmap_plot
拼图
代码语言:javascript复制library(patchwork)
volcano_plot heatmap_plot$gtable
感兴趣基因的相关性
代码语言:javascript复制library(corrplot)
g = sample(deg$symbol[1:500],10) # 这里是随机取样,注意换成自己感兴趣的基因
g
M = cor(t(exp[g,]))#计算列与列之间的相关性
pheatmap(M)
library(paletteer)
my_color = rev(paletteer_d("RColorBrewer::RdYlBu"))#选出一组颜色
my_color = colorRampPalette(my_color)(10)#切成十种颜色
corrplot(M, type="upper",
method="pie",
order="hclust",
col=my_color,
tl.col="black",
tl.srt=45)
library(cowplot)
cor_plot <- recordPlot()
拼图
代码语言:javascript复制load("pca_plot.Rdata")
plot_grid(pca_plot,cor_plot,
volcano_plot,heatmap_plot$gtable)
dev.off()
保存
代码语言:javascript复制pdf("deg.pdf")
plot_grid(pca_plot,cor_plot,
volcano_plot,heatmap_plot$gtable)
dev.off()
如何确认自己的差异分析分组反了没有
代码语言:javascript复制a = deg$symbol[1]
boxplot(exp[a,]~Group)
deg$logFC[1]
富集分析数据库
KEGG数据库
GO数据库
Y叔和clusterProfiler
富集分析:衡量每个通路里的基因在差异基因里是否足够多
GO 富集分析
代码语言:javascript复制rm(list = ls())
load(file = 'step4output.Rdata')
library(clusterProfiler)
library(ggthemes)
library(org.Hs.eg.db)
library(dplyr)
library(ggplot2)
library(stringr)
library(enrichplot)
#(1)输入数据
gene_up = deg$ENTREZID[deg$change == 'up']
gene_down = deg$ENTREZID[deg$change == 'down']
gene_diff = c(gene_up,gene_down)
#(2)富集
#以下步骤耗时很长,设置了存在即跳过
f = paste0(gse_number,"_GO.Rdata")
if(!file.exists(f)){
ego <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "ALL",
readable = TRUE)
ego_BP <- enrichGO(gene = gene_diff,
OrgDb= org.Hs.eg.db,
ont = "BP",
readable = TRUE)
#ont参数:One of "BP", "MF", and "CC" subontologies, or "ALL" for all three.
save(ego,ego_BP,file = f)
}
load(f)
#(3)可视化
#条带图
barplot(ego)
barplot(ego, split = "ONTOLOGY", font.size = 10,
showCategory = 5)
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")
#气泡图
dotplot(ego)
dotplot(ego, split = "ONTOLOGY", font.size = 10,
showCategory = 5)
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")
#(3)展示top通路的共同基因,要放大看。
#gl 用于设置下图的颜色
gl = deg$logFC
names(gl)=deg$ENTREZID
#Gene-Concept Network,要放大看
cnetplot(ego,
#layout = "star",
color.params = list(foldChange = gl),
showCategory = 3)
# 2.KEGG pathway analysis----
#上调、下调、差异、所有基因
#(1)输入数据
gene_up = deg[deg$change == 'up','ENTREZID']
gene_down = deg[deg$change == 'down','ENTREZID']
gene_diff = c(gene_up,gene_down)
#(2)对上调/下调/所有差异基因进行富集分析
f2 = paste0(gse_number,"_KEGG.Rdata")
if(!file.exists(f2)){
kk.up <- enrichKEGG(gene = gene_up,
organism = 'hsa')
kk.down <- enrichKEGG(gene = gene_down,
organism = 'hsa')
kk.diff <- enrichKEGG(gene = gene_diff,
organism = 'hsa')
save(kk.diff,kk.down,kk.up,file = f2)
}
load(f2)
#(3)看看富集到了吗?https://mp.weixin.qq.com/s/NglawJgVgrMJ0QfD-YRBQg
table(kk.diff@result$p.adjust<0.05)
table(kk.up@result$p.adjust<0.05)
table(kk.down@result$p.adjust<0.05)
#(4)双向图
# 富集分析所有图表默认都是用p.adjust,富集不到可以退而求其次用p值,在文中说明即可
source("kegg_plot_function.R")
g_kegg <- kegg_plot(kk.up,kk.down)
g_kegg
#g_kegg scale_y_continuous(labels = c(2,0,2,4,6))