生信小课堂(4) R中轻松进行WGCNA分析

2023-10-24 14:01:15 浏览数 (1)

欢迎关注R语言数据分析指南

加载R包

代码语言:javascript复制
library(WGCNA)
library(tidyverse)
library(cowplot)

导入数据

代码语言:javascript复制
gene_exp <- read.table(file ='heatmap.txt',row.names=1)
datExpr0 <- t(gene_exp) # 转置

数据过滤

代码语言:javascript复制
# 缺失数据及无波动数据过滤
gsg <- goodSamplesGenes(datExpr0,minFraction = 1/2) #基因的缺失数据比例阈值
datExpr <- datExpr0[gsg$goodSamples, gsg$goodGenes]

# 通过聚类,查看是否有明显异常样本, 如果有需要剔除掉
plot(hclust(dist(datExpr)),cex.lab = 1.5,cex.axis = 1.5,cex.main = 2)
代码语言:javascript复制
enableWGCNAThreads(nThreads =40) # 设置线程数
# 通过对 power 的多次迭代,确定最佳 power
sft <- pickSoftThreshold(datExpr, powerVector = 1:20, # 尝试 1 到 20
                         networkType="unsigned")

fig_power1 <- ggplot(data = sft$fitIndices,aes(x = Power,y = SFT.R.sq))  
  geom_point(color = 'red')  
  geom_text_repel(aes(label = Power))  
  geom_hline(aes(yintercept = 0.85), color = 'red')  
  labs(title = 'Scale independence',
       x = 'Soft Threshold (power)',
       y = 'Scale Free Topology Model Fit,signed R^2')  
  theme_few()  
  theme(plot.title = element_text(hjust = 0.5))

fig_power2 <- ggplot(data = sft$fitIndices,
                     aes(x = Power,
                         y = mean.k.))  
  geom_point(color = 'red')  
  geom_text_repel(aes(label = Power))  
  labs(title = 'Mean connectivity',
       x = 'Soft Threshold (power)',
       y = 'Mean Connectivity')  
  theme_few() 
  theme(plot.title = element_text(hjust = 0.5))

plot_grid(fig_power1, fig_power2)
代码语言:javascript复制
sft$powerEstimate  #  最佳B值
代码语言:javascript复制
net <- blockwiseModules(datExpr,  # 输入数据
  corType = "pearson", # 相关系数算法,pearson|bicor
  power = 5, # 前面得到的 soft power
  networkType = "unsigned", # unsigned | signed | signed hybrid
  # 计算 TOM 矩阵
  TOMType = "unsigned", # none | unsigned | signed
  saveTOMs = TRUE,
  saveTOMFileBase = "blockwiseTOM",
  # 聚类并划分模块
 # deepSplit = 0, # 0|1|2|3|4, 值越大得到的模块就越多越小,不设置则使用默认值
  minModuleSize =30,
  mergeCutHeight = 0.25,  #  对距离小于mergeCutHeight的模块进行合并
  numericLabels = FALSE, # 以数字命名模块
  nThreads = 0, # 0 则使用所有可用线程
  maxBlockSize = 100000 # 需要大于基因的数目
)
代码语言:javascript复制
table(net$colors) # 查看每个模块包含基因数目
save(net,file="net.Rdata") # 结果保存
代码语言:javascript复制
         1416          4980          4271           139           118           107 
   darkorange       darkred darkturquoise         green   greenyellow          grey 
          100           124           112          2119           151          1014 
       grey60     lightcyan    lightgreen   lightyellow       magenta  midnightblue 
          137           138           136           133           175           138 
       orange paleturquoise          pink        purple           red     royalblue 
          101            58          1249           166          1720           127 
  saddlebrown        salmon       skyblue     steelblue           tan     turquoise 
           80           143            89            69           148          5221 
       violet         white        yellow 
           36            96          3898 

WGCNA的结果导出

代码语言:javascript复制
wgcna_result <- data.frame(gene_id = names(net$colors),module = net$colors)

wgcna_result %>% write.table(file="wgcna_result.xls",row.names = F,sep="t",quote = F)

绘制聚类图与模块

代码语言:javascript复制
plotDendroAndColors(
    dendro = net$dendrograms[[1]], 
    colors = net$colors,
    groupLabels = "Module colors",
    dendroLabels = FALSE, 
    addGuide = TRUE)
代码语言:javascript复制
rdatTraits <- read.table("group.xls",sep="t",header = T,row.names = 1)
design=model.matrix(~0  datTraits$group)
colnames(design)=levels(factor(datTraits$group))
rownames(design) <- rownames(datTraits)

design %<>% as.data.frame()
moduleTraitCor <- cor(net$MEs, design,use = "p",method = 'pearson')
  
  # 计算 Pvalue
moduleTraitPvalue <- corPvalueStudent(moduleTraitCor,nrow(datExpr))
代码语言:javascript复制
sizeGrWindow(10,6)
# 连接相关性和 pvalue
textMatrix <- paste(signif(moduleTraitCor, 2), "n(",
                      signif(moduleTraitPvalue, 1), ")", sep = "");
  dim(textMatrix) <- dim(moduleTraitCor)
  par(mar = c(6,10,3,3))
  labeledHeatmap(Matrix = moduleTraitCor,
  xLabels = names(design),yLabels = names(net$MEs),
  ySymbols = names(net$MEs),colorLabels = FALSE,
  colors = blueWhiteRed(50),textMatrix = textMatrix,
  setStdMargins = FALSE,cex.text = 0.5,zlim = c(-1,1),
  main = paste("Module-trait relationships"))

moduleTraitCor %>% write.table(file="moduleTraitCor.xls",sep="t",quote = F)

模块的导出

代码语言:javascript复制
my_modules <- c('yellow')
# 提取该模块的表达矩阵
m_wgcna_result <- filter(wgcna_result, module %in% my_modules)
m_datExpr <- datExpr[, m_wgcna_result$gene_id]
# 计算该模块的 TOM 矩阵  
m_TOM <- TOMsimilarityFromExpr(m_datExpr,
    power = 20,networkType = "unsigned",
    TOMType = "unsigned")
dimnames(m_TOM) <- list(colnames(m_datExpr), colnames(m_datExpr))

# 导出 Cytoscape 输入文件
cyt <- exportNetworkToCytoscape(m_TOM,
    edgeFile = "CytoscapeInput-yellow-network.txt",
    weighted = TRUE,threshold = 0.2)

MEs <- net$MEs # 修改名称
colnames(MEs) <- str_remove(colnames(MEs),"ME")
save(MEs,design,file="df.Rdata") # 结果保存

0 人点赞