表达矩阵下载及分组处理
代码语言:text
复制rm(list = ls())
library(GEOquery)
library(stringr)
gse = "GSE83521"
eSet1 <- getGEO("GSE83521",
destdir = '.',
getGPL = F)
eSet2 <- getGEO("GSE89143",
destdir = '.',
getGPL = F)
#(1)提取表达矩阵exp
exp1 <- exprs(eSet1[[1]])
exp1[1:4,1:4]
exp2 <- exprs(eSet2[[1]])
exp2[1:4,1:4]
range(exp1)
range(exp2)
exp2 = log2(exp2 1)
table(rownames(exp1) %in% rownames(exp2))
length(intersect(rownames(exp1),rownames(exp2)))
exp1 <- exp1[intersect(rownames(exp1),rownames(exp2)),]
exp2 <- exp2[intersect(rownames(exp1),rownames(exp2)),]
boxplot(exp1)
boxplot(exp2)
#(2)提取临床信息
pd1 <- pData(eSet1[[1]])
pd2 <- pData(eSet2[[1]])
if(!identical(rownames(pd1),colnames(exp1))) exp1 = exp1[,match(rownames(pd1),colnames(exp1))]
if(!identical(rownames(pd2),colnames(exp2))) exp2 = exp2[,match(rownames(pd2),colnames(exp2))]
#(3)提取芯片平台编号
gpl <- eSet2[[1]]@annotation
#(4)合并表达矩阵
# exp2的第三个样本有些异常,可以去掉或者用normalizeBetweenArrays标准化,把它拉回正常水平。
exp2=limma::normalizeBetweenArrays(exp2)
boxplot(exp2)
# exp2 = exp2[,-3]
exp = cbind(exp1,exp2)
boxplot(exp)
Group1 = ifelse(str_detect(pd1$title,"Tumour"),"Tumour","Normal")
# Group2 = ifelse(str_detect(pd2$source_name_ch1,"Paracancerous"),"Normal","Tumour")[-3]
Group2 = ifelse(str_detect(pd2$source_name_ch1,"Paracancerous"),"Normal","Tumour")
Group = c(Group1,Group2)
table(Group)
Group = factor(Group,levels = c("Normal","Tumour"))
save(gse,Group,exp,gpl,file = "exp.Rdata")
处理批次效应
方法一
代码语言:text
复制rm(list = ls())
load("exp.Rdata")
#处理批次效应
library(limma)
#?removeBatchEffect()
# batch <- c(rep("A",12),rep("B",5))
batch <- c(rep("A",12),rep("B",6))
exp2 <- removeBatchEffect(exp, batch)
par(mfrow=c(1,2))
boxplot(as.data.frame(exp),main="Original")
boxplot(as.data.frame(exp2),main="Batch corrected")
?par#学习函数
方法二
代码语言:text
复制rm(list = ls())
load("exp.Rdata")
#处理批次效应(combat)
# if (!require("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#
# BiocManager::install("sva")
library(sva)
#?ComBat
# batch <- c(rep("A",12),rep("B",5))
batch <- c(rep("A",12),rep("B",6))
mod = model.matrix(~Group)
exp2 = ComBat(dat=exp, batch=batch,
mod=mod, par.prior=TRUE, ref.batch="A")
par(mfrow=c(1,2))
boxplot(as.data.frame(exp),main="Original")
boxplot(as.data.frame(exp2),main="Batch corrected")