欢迎关注R语言数据分析指南
加载R包
代码语言:javascript
复制library(WGCNA)
library(tidyverse)
library(cowplot)
导入数据
代码语言:javascript
复制gene_exp <- read.table(file ='heatmap.txt',row.names=1)
datExpr0 <- t(gene_exp) # 转置
数据过滤
代码语言:javascript
复制# 缺失数据及无波动数据过滤
gsg <- goodSamplesGenes(datExpr0,minFraction = 1/2) #基因的缺失数据比例阈值
datExpr <- datExpr0[gsg$goodSamples, gsg$goodGenes]
# 通过聚类,查看是否有明显异常样本, 如果有需要剔除掉
plot(hclust(dist(datExpr)),cex.lab = 1.5,cex.axis = 1.5,cex.main = 2)
代码语言:javascript
复制enableWGCNAThreads(nThreads =40) # 设置线程数
# 通过对 power 的多次迭代,确定最佳 power
sft <- pickSoftThreshold(datExpr, powerVector = 1:20, # 尝试 1 到 20
networkType="unsigned")
fig_power1 <- ggplot(data = sft$fitIndices,aes(x = Power,y = SFT.R.sq))
geom_point(color = 'red')
geom_text_repel(aes(label = Power))
geom_hline(aes(yintercept = 0.85), color = 'red')
labs(title = 'Scale independence',
x = 'Soft Threshold (power)',
y = 'Scale Free Topology Model Fit,signed R^2')
theme_few()
theme(plot.title = element_text(hjust = 0.5))
fig_power2 <- ggplot(data = sft$fitIndices,
aes(x = Power,
y = mean.k.))
geom_point(color = 'red')
geom_text_repel(aes(label = Power))
labs(title = 'Mean connectivity',
x = 'Soft Threshold (power)',
y = 'Mean Connectivity')
theme_few()
theme(plot.title = element_text(hjust = 0.5))
plot_grid(fig_power1, fig_power2)
代码语言:javascript
复制sft$powerEstimate # 最佳B值
代码语言:javascript
复制net <- blockwiseModules(datExpr, # 输入数据
corType = "pearson", # 相关系数算法,pearson|bicor
power = 5, # 前面得到的 soft power
networkType = "unsigned", # unsigned | signed | signed hybrid
# 计算 TOM 矩阵
TOMType = "unsigned", # none | unsigned | signed
saveTOMs = TRUE,
saveTOMFileBase = "blockwiseTOM",
# 聚类并划分模块
# deepSplit = 0, # 0|1|2|3|4, 值越大得到的模块就越多越小,不设置则使用默认值
minModuleSize =30,
mergeCutHeight = 0.25, # 对距离小于mergeCutHeight的模块进行合并
numericLabels = FALSE, # 以数字命名模块
nThreads = 0, # 0 则使用所有可用线程
maxBlockSize = 100000 # 需要大于基因的数目
)
代码语言:javascript
复制table(net$colors) # 查看每个模块包含基因数目
save(net,file="net.Rdata") # 结果保存
代码语言:javascript
复制 1416 4980 4271 139 118 107
darkorange darkred darkturquoise green greenyellow grey
100 124 112 2119 151 1014
grey60 lightcyan lightgreen lightyellow magenta midnightblue
137 138 136 133 175 138
orange paleturquoise pink purple red royalblue
101 58 1249 166 1720 127
saddlebrown salmon skyblue steelblue tan turquoise
80 143 89 69 148 5221
violet white yellow
36 96 3898
WGCNA的结果导出
代码语言:javascript
复制wgcna_result <- data.frame(gene_id = names(net$colors),module = net$colors)
wgcna_result %>% write.table(file="wgcna_result.xls",row.names = F,sep="t",quote = F)
绘制聚类图与模块
代码语言:javascript
复制plotDendroAndColors(
dendro = net$dendrograms[[1]],
colors = net$colors,
groupLabels = "Module colors",
dendroLabels = FALSE,
addGuide = TRUE)
代码语言:javascript
复制rdatTraits <- read.table("group.xls",sep="t",header = T,row.names = 1)
design=model.matrix(~0 datTraits$group)
colnames(design)=levels(factor(datTraits$group))
rownames(design) <- rownames(datTraits)
design %<>% as.data.frame()
moduleTraitCor <- cor(net$MEs, design,use = "p",method = 'pearson')
# 计算 Pvalue
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor,nrow(datExpr))
代码语言:javascript
复制sizeGrWindow(10,6)
# 连接相关性和 pvalue
textMatrix <- paste(signif(moduleTraitCor, 2), "n(",
signif(moduleTraitPvalue, 1), ")", sep = "");
dim(textMatrix) <- dim(moduleTraitCor)
par(mar = c(6,10,3,3))
labeledHeatmap(Matrix = moduleTraitCor,
xLabels = names(design),yLabels = names(net$MEs),
ySymbols = names(net$MEs),colorLabels = FALSE,
colors = blueWhiteRed(50),textMatrix = textMatrix,
setStdMargins = FALSE,cex.text = 0.5,zlim = c(-1,1),
main = paste("Module-trait relationships"))
moduleTraitCor %>% write.table(file="moduleTraitCor.xls",sep="t",quote = F)
模块的导出
代码语言:javascript
复制my_modules <- c('yellow')
# 提取该模块的表达矩阵
m_wgcna_result <- filter(wgcna_result, module %in% my_modules)
m_datExpr <- datExpr[, m_wgcna_result$gene_id]
# 计算该模块的 TOM 矩阵
m_TOM <- TOMsimilarityFromExpr(m_datExpr,
power = 20,networkType = "unsigned",
TOMType = "unsigned")
dimnames(m_TOM) <- list(colnames(m_datExpr), colnames(m_datExpr))
# 导出 Cytoscape 输入文件
cyt <- exportNetworkToCytoscape(m_TOM,
edgeFile = "CytoscapeInput-yellow-network.txt",
weighted = TRUE,threshold = 0.2)
MEs <- net$MEs # 修改名称
colnames(MEs) <- str_remove(colnames(MEs),"ME")
save(MEs,design,file="df.Rdata") # 结果保存