单细胞数据为什么要去除批次效应?

2020-04-01 16:12:34 浏览数 (1)

做单细胞测序的时候,我们往往用到不同时期或者不同测序平台的数据,即使是同样的细胞类型,也可能完全不能聚类到一个类群中,如下所示,这两个数据是不同时期做的同一个细胞,几乎没有交集,因此,我们分析的时候需要去除批次效应。

去除批次效应前:

代码语言:javascript复制
# Initialize the Seurat object with the raw (non-normalized data).
P1_rep1 <- CreateSeuratObject(counts = P1_rep1, project = "P1_rep1", min.cells = 3, min.features = 200)
P1_rep2 <- CreateSeuratObject(counts = P1_rep2, project = "P1_rep2", min.cells = 3, min.features = 200)

P0P1_add<-merge(x = P1_rep1, y = P1_rep2)
P0P1_add[["percent.mt"]] <- PercentageFeatureSet(P0P1_add, pattern = "^Mt-")
VlnPlot(P0P1_add, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3,pt.size = 0)
#P0P1_add <- NormalizeData(object = P0P1_add, normalization.method = "LogNormalize", scale.factor = 1e4)
P0P1_add <- SCTransform(P0P1_add, vars.to.regress = "percent.mt", verbose = FALSE)
P0P1_add <- FindVariableFeatures(object = P0P1_add,selection.method = 'vst', nfeatures = 2500)

P0P1_add.combined <- RunPCA(P0P1_add, npcs = 20, verbose = FALSE)
ElbowPlot(object = P0P1_add.combined,ndims = 40)
# t-SNE and Clustering
P0P1_add.combined <- RunUMAP(P0P1_add.combined, reduction = "pca", dims = 1:12)
P0P1_add.combined <- RunTSNE(P0P1_add.combined, reduction = "pca", dims = 1:12)
P0P1_add.combined <- FindNeighbors(P0P1_add.combined, reduction = "pca", dims = 1:12)
P0P1_add.combined <- FindClusters(P0P1_add.combined, resolution = 0.3)
# Visualization
DimPlot(P0P1_add.combined, reduction = "umap", group.by = "orig.ident")
DimPlot(P0P1_add.combined, reduction = "umap", label = TRUE)

image.png

image.png

通过整合数据在进行聚类,效果还是不是很理想:

代码语言:javascript复制
P0P1.list <- SplitObject(P0P1, split.by = "orig.ident")
P0P1.list <- lapply(X = P0P1.list, FUN = function(x) {
    x <- NormalizeData(x)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})
P0P1.anchors <- FindIntegrationAnchors(object.list = P0P1.list, dims = 1:20)
P0P1.combined <- IntegrateData(anchorset = P0P1.anchors, dims = 1:20)
DefaultAssay(P0P1.combined) <- "integrated"

# Run the standard workflow for visualization and clustering
P0P1.combined <- ScaleData(P0P1.combined, vars.to.regress = "percent.mt",verbose = FALSE)
P0P1.combined <- RunPCA(P0P1.combined, npcs = 30, verbose = FALSE)
ElbowPlot(P0P1.combined)
# t-SNE and Clustering
P0P1.combined <- RunUMAP(P0P1.combined, reduction = "pca", dims = 1:12)
P0P1.combined <- FindNeighbors(P0P1.combined, reduction = "pca", dims = 1:12)
P0P1.combined <- FindClusters(P0P1.combined, resolution = 0.2)
# Visualization
DimPlot(P0P1.combined, reduction = "umap", group.by = "orig.ident")
DimPlot(P0P1.combined, reduction = "umap", label = TRUE)

image.png

image.png

换另一中整合数据和 SCTransform标准化结果好多了,能看到两个数据基本能重合。

代码语言:javascript复制
options(future.globals.maxSize = 4000 * 1024^2)
P0P1.list <- SplitObject(P0P1, split.by = "orig.ident")
for (i in 1:length(P0P1.list)) {
    P0P1.list[[i]] <- SCTransform(P0P1.list[[i]], verbose = FALSE)
}

P0P1.features <- SelectIntegrationFeatures(object.list = P0P1.list, nfeatures = 3000)
P0P1.list <- PrepSCTIntegration(object.list = P0P1.list, anchor.features = P0P1.features, 
    verbose = FALSE)

P0P1.anchors <- FindIntegrationAnchors(object.list = P0P1.list, normalization.method = "SCT", 
    anchor.features = P0P1.features, verbose = FALSE)
P0P1.integrated <- IntegrateData(anchorset = P0P1.anchors, normalization.method = "SCT", 
    verbose = FALSE)

P0P1.integrated <- RunPCA(P0P1.integrated, verbose = FALSE)
ElbowPlot(P0P1.integrated)
P0P1.integrated <- RunUMAP(P0P1.integrated, dims = 1:12)
P0P1.integrated <- FindNeighbors(P0P1.integrated, reduction = "pca", dims = 1:12)
P0P1.integrated <- FindClusters(P0P1.integrated, resolution = 0.2)

DimPlot(P0P1.integrated, group.by = c("orig.ident","seurat_clusters"), combine = FALSE)

image.png

image.png

0 人点赞