主要方法:将其中某一组设置为实验组,其余几组统一设置为对照组。
- 第一步读取数据,合并表达矩阵和分组文件
#===========================================================================
#===========================================================================
rm(list = ls(all.names = TRUE))
options(stringsAsFactors = F)
library(Matrix)
setwd('D:\SCIwork\F38KRT\s2')
data <- read.csv('cdata.csv', header = T, row.names = 1)
data <- as.data.frame(t(data))
data[1:4,1:4]
normalized<-function(y) {
x<-y[!is.na(y)]
x<-(x - min(x)) / (max(x) - min(x))
y[!is.na(y)]<-x
return(y)
}
db <- as.data.frame(apply(data,2,normalized))
data <- db
data$sample <- rownames(data)
data$sample <- chartr(old='.', new='-', x=data$sample)
setwd('D:\SCIwork\F38KRT\s3')
group <- read.csv('group2.csv', header = T)
names(group)[1] <- 'sample'
group$sample <- chartr(old='.', new='-', x=group$sample)
group <- subset(group, select=c("sample", "group"))
group$subtype <- group$group
group$group <- NULL
dt <- merge(group, data, by='sample')
dt[1:4,1:4]
dt$sample <- NULL
table(dt$subtype)
dt_total <- dt
输入文件cdata经过T转置为data后如下所示:
上面的代码包括了,对表达量归一化的代码。
normalized<-function(y) { x<-y[!is.na(y)] x<-(x - min(x)) / (max(x) - min(x)) y[!is.na(y)]<-x return(y)}
经过这个代码,样本的表达量将会被转化到0-1之间的数值。
- 将subtype1设置为exp组,其余两组(subtype2,和subtype3)设置为con组。
#===========================================================================
#===========================================================================
dt$subtype <- ifelse(dt$subtype == 'Subtype1', 'Exp', 'Con')
table(dt$subtype)
dt <- dt[order(dt$subtype), ]
dt[1:4,1:4]
dt_Con <- subset(dt, dt$subtype == 'Con')
dt_Con[1:4,1:4]
dt_Exp <- subset(dt, dt$subtype == 'Exp')
dt_Exp[1:4,1:4]
dt_Con$subtype <- paste0(dt_Con$subtype, rownames(dt_Con))
rownames(dt_Con) <- dt_Con$subtype
dt_Con$subtype <- NULL
dt_Con <- as.data.frame(t(dt_Con))
dt_Exp$subtype <- paste0(dt_Exp$subtype, rownames(dt_Exp))
rownames(dt_Exp) <- dt_Exp$subtype
dt_Exp$subtype <- NULL
dt_Exp <- as.data.frame(t(dt_Exp))
Pvalue<-c(rep(0,nrow(dt_Con)))
log2_FC<-c(rep(0,nrow(dt_Con)))
for(i in 1:nrow(dt_Con)){
y=t.test(as.numeric(dt_Con[i,]),as.numeric(dt_Exp[i,]))
Pvalue[i] <- y$p.value
log2_FC[i] <-log2(mean(as.numeric(dt_Exp[i,]))/(mean(as.numeric(dt_Con[i,]))))
}
library(dplyr)
library(tidyr)
library(tibble)
# 对p value进行FDR校正
fdr=p.adjust(Pvalue, "BH")
# 在原文件后面加入log2FC,p value和FDR,共3列;
out<- as.data.frame(cbind(log2_FC,Pvalue,fdr))
out$gene <- rownames(dt_Con)
# out <- out %>%
# dplyr::filter(log2_FC > 0.5 & Pvalue < 0.05)
setwd('D:\SCIwork\F38KRT\s5')
write.csv(out, file = 'out_S1.csv')
循环T检验后求取差异基因的差异倍数和P值。
- 同样的逻辑,分别求取subtype2和subtype3的差异基因
#===========================================================================
#===========================================================================
dt <- dt_total
dt$subtype <- ifelse(dt$subtype == 'Subtype2', 'Exp', 'Con')
table(dt$subtype)
dt <- dt[order(dt$subtype), ]
dt[1:4,1:4]
dt_Con <- subset(dt, dt$subtype == 'Con')
dt_Con[1:4,1:4]
dt_Exp <- subset(dt, dt$subtype == 'Exp')
dt_Exp[1:4,1:4]
dt_Con$subtype <- paste0(dt_Con$subtype, rownames(dt_Con))
rownames(dt_Con) <- dt_Con$subtype
dt_Con$subtype <- NULL
dt_Con <- as.data.frame(t(dt_Con))
dt_Exp$subtype <- paste0(dt_Exp$subtype, rownames(dt_Exp))
rownames(dt_Exp) <- dt_Exp$subtype
dt_Exp$subtype <- NULL
dt_Exp <- as.data.frame(t(dt_Exp))
Pvalue<-c(rep(0,nrow(dt_Con)))
log2_FC<-c(rep(0,nrow(dt_Con)))
for(i in 1:nrow(dt_Con)){
y=t.test(as.numeric(dt_Con[i,]),as.numeric(dt_Exp[i,]))
Pvalue[i] <- y$p.value
log2_FC[i] <-log2(mean(as.numeric(dt_Exp[i,]))/(mean(as.numeric(dt_Con[i,]))))
}
# 对p value进行FDR校正
fdr=p.adjust(Pvalue, "BH")
# 在原文件后面加入log2FC,p value和FDR,共3列;
out<- as.data.frame(cbind(log2_FC,Pvalue,fdr))
out$gene <- rownames(dt_Con)
setwd('D:\SCIwork\F38KRT\s5')
write.csv(out, file = 'out_S2.csv')
#===========================================================================
#===========================================================================
dt <- dt_total
dt$subtype <- ifelse(dt$subtype == 'Subtype3', 'Exp', 'Con')
table(dt$subtype)
dt <- dt[order(dt$subtype), ]
dt[1:4,1:4]
dt_Con <- subset(dt, dt$subtype == 'Con')
dt_Con[1:4,1:4]
dt_Exp <- subset(dt, dt$subtype == 'Exp')
dt_Exp[1:4,1:4]
dt_Con$subtype <- paste0(dt_Con$subtype, rownames(dt_Con))
rownames(dt_Con) <- dt_Con$subtype
dt_Con$subtype <- NULL
dt_Con <- as.data.frame(t(dt_Con))
dt_Exp$subtype <- paste0(dt_Exp$subtype, rownames(dt_Exp))
rownames(dt_Exp) <- dt_Exp$subtype
dt_Exp$subtype <- NULL
dt_Exp <- as.data.frame(t(dt_Exp))
Pvalue<-c(rep(0,nrow(dt_Con)))
log2_FC<-c(rep(0,nrow(dt_Con)))
for(i in 1:nrow(dt_Con)){
y=t.test(as.numeric(dt_Con[i,]),as.numeric(dt_Exp[i,]))
Pvalue[i] <- y$p.value
log2_FC[i] <-log2(mean(as.numeric(dt_Exp[i,]))/(mean(as.numeric(dt_Con[i,]))))
}
# 对p value进行FDR校正
fdr=p.adjust(Pvalue, "BH")
# 在原文件后面加入log2FC,p value和FDR,共3列;
out<- as.data.frame(cbind(log2_FC,Pvalue,fdr))
out$gene <- rownames(dt_Con)
setwd('D:\SCIwork\F38KRT\s5')
write.csv(out, file = 'out_S3.csv')