差异分析
芯片差异分析所需要的输入数据
代码语言:text复制fviz_pca_ind(iris.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = iris$Species, # color by groups
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
输入数据准备:
代码语言:text复制rm(list = ls())
load(file = "step2output.Rdata")
#输入数据:exp和Group
#Principal Component Analysis
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials
# fviz_pca_ind(iris.pca,
# geom.ind = "point", # show points only (nbut not "text")
# col.ind = iris$Species, # color by groups
# palette = c("#00AFBB", "#E7B800", "#FC4E07"),
# addEllipses = TRUE, # Concentration ellipses
# legend.title = "Groups"
# )
##ctrl shift C批量注释
PCA 图
代码语言:text复制dat=as.data.frame(t(exp))
library(FactoMineR)
library(factoextra)
dat.pca <- PCA(dat, graph = FALSE)
fviz_pca_ind(dat.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = Group, # color by groups
palette = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
# 2.top 1000 sd 热图----
###看一下数据,差异基因或者组内差异较大的基因;
g = names(tail(sort(apply(exp,1,sd)),1000)) #day7-apply的思考题
n = exp[g,]
library(pheatmap)
annotation_col = data.frame(row.names = colnames(n),
Group = Group)
##分组信息;
ann_colors = list(Group = c(Disease="#1B9E77", Normal="firebrick"))
##添加颜色;
pheatmap(n,
show_colnames =F,
show_rownames = F,
annotation_col=annotation_col,
annotation_colors = ann_colors,
color = colorRampPalette(c("navy", "white", "firebrick3"))(100),
scale = "row", #按行标准化,只保留行内差别,不保留行间差别,会把数据范围缩放到大概-5~5之间
breaks = seq(-3,3,length.out = 100) #设置色带分布范围为-3~3之间,超出此范围的数字显示极限颜色
)
?pheatmap
# 关于scale的进一步学习:zz.scale.R
芯片分析后的数据整理:
二分组差异分析
代码语言:text复制rm(list = ls())
load(file = "step2output.Rdata")
#差异分析
library(limma)
design = model.matrix(~Group)#模型矩阵
fit = lmFit(exp,design)#线性拟合
fit = eBayes(fit)#贝叶斯检验
deg = topTable(fit,coef = 2,number = Inf)#提取结果
##函数表示
analy=function(exp,Group){
design = model.matrix(~Group)#模型矩阵
fit = lmFit(exp,design)#线性拟合
fit = eBayes(fit)#贝叶斯检验
deg = topTable(fit,coef = 2,number = Inf)#提取结果
}
deg2=analy(exp,Group)
identical(deg,deg2)
多个探针对应一个基因
随机去重、保留平均值最大的探针或者取多个探针的平均值。
代码语言:text复制#为deg数据框添加几列
#1.加probe_id列,把行名变成一列
library(dplyr)
deg = mutate(deg,probe_id = rownames(deg))
?mutate
#2.加上探针注释
ids = distinct(ids,symbol,.keep_all = T)##随机去除重复值;
deg = inner_join(deg,ids,by="probe_id")
nrow(deg)
#3.加change列,标记上下调基因
logFC_t = 1
p_t = 0.05
k1 = (deg$P.Value < p_t)&(deg$logFC < -logFC_t)
k2 = (deg$P.Value < p_t)&(deg$logFC > logFC_t)
deg = mutate(deg,change = ifelse(k1,"down",ifelse(k2,"up","stable")))
table(deg$change)
#火山图
library(ggplot2)
ggplot(data = deg,
aes(x = logFC,
y = -log10(P.Value)))
geom_point(alpha=0.4, size=3.5,
aes(color=change))
scale_color_manual(values=c("blue", "grey","red"))
geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",linewidth=0.8)
geom_hline(yintercept = -log10(p_t),lty=4,col="black",linewidth=0.8)
theme_bw()##改灰色背景
代码语言:text复制# 差异基因热图----
# 表达矩阵行名替换为基因名
exp = exp[deg$probe_id,]
rownames(exp) = deg$symbol
diff_gene = deg$symbol[deg$change !="stable"]
n = exp[diff_gene,]
library(pheatmap)
annotation_col = data.frame(group = Group)
rownames(annotation_col) = colnames(n)
pheatmap(n,show_colnames =F,
show_rownames = F,
scale = "row",
#cluster_cols = F,
annotation_col=annotation_col,
breaks = seq(-3,3,length.out = 100)
)