单细胞测序—比较两个Seurat分析结果中细胞簇和细胞类型的对应关系

如果一个数据集我们采用了两种方法对其进行了分析，可采用如下方法比较两个Seurat分析结果中细胞簇和细胞类型的对应关系。

分析结果1

采用标准流程得到的Seurat对象

代码语言：r复制

load(file = 'phe-by-basic-seurat.Rdata')
phe_basic=phe

分析结果2

将矩阵中，表达量非0则定为1

（没有背后的生物学意义，只是为了演示）

代码语言：r复制

rm(list=ls()) 
library(Seurat)

pbmc.data <- Read10X(data.dir = "./filtered_gene_bc_matrices/hg19/")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", 
                           min.cells = 3, min.features = 200)


ct=pbmc@assays$RNA$counts
ct
#表达量非零则赋值为1
ct[ct>0]=1 
ct
tmp <- data.frame(ct[,1:10])
#与step1中代码基本一致
pbmc@assays$RNA$counts=ct
pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", 
                      scale.factor = 10000) 
pbmc <- NormalizeData(pbmc) 
## Identify the 2000 most highly variable genes
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 2000)
## In addition we scale the data
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), 
               verbose = FALSE)
pbmc <- FindNeighbors(pbmc, dims = 1:10, verbose = FALSE)
pbmc <- FindClusters(pbmc, resolution = 0.5, verbose = FALSE)
pbmc <- RunUMAP(pbmc, dims = 1:10, umap.method = "uwot", metric = "cosine")
table(pbmc$seurat_clusters)
# pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25,  logfc.threshold = 0.25, verbose = FALSE)

library(patchwork)
library(ggplot2)
p1=DimPlot(pbmc, reduction = "umap", group.by = 'seurat_clusters',
        label = TRUE, pt.size = 0.5) 
p2=DotPlot(pbmc, features = c("MS4A1", "GNLY", "CD3E", 
                           "CD14", "FCER1A", "FCGR3A", 
                           "LYZ", "PPBP", "CD8A"),
        group.by = 'seurat_clusters') theme(axis.text.x = element_text(angle = 45, 
                                                                       vjust = 0.5, hjust=0.5))

p1 p2 
phe=pbmc@meta.data
save(phe,file = 'phe-by-0-1-matrix.Rdata')
load(file = 'phe-by-0-1-matrix.Rdata')
phe_0_1=phe

比较

代码语言：r复制

identical(rownames(phe_0_1),rownames(phe_basic))
library(gplots)
balloonplot(table(phe_basic$seurat_clusters,phe_0_1$seurat_clusters))

identical(rownames(phe_0_1), rownames(phe_basic))

这行代码比较 phe_0_1 和 phe_basic 数据框的行名是否完全相同。如果相同，返回 TRUE；否则返回 FALSE。
这一步通常用来确保这两个数据框中的细胞是一一对应的，便于后续比较。

balloonplot(table(phe_basic$seurat_clusters, phe_0_1$seurat_clusters))

这行代码创建了一个交叉表，显示 phe_basic 和 phe_0_1 中细胞簇（seurat_clusters）的对应关系。
然后使用 balloonplot 函数可视化这个交叉表，显示两个分析结果之间的细胞簇对应关系。气球的大小表示在特定的簇组合中，细胞的数量。

代码语言：r复制

phe_0_1$type_by_0_1 = ifelse(phe_0_1$seurat_clusters %in% c(0,1,2,5,8),'Tcells',
       ifelse(phe_0_1$seurat_clusters %in% c(3),'Bcells','myeoloid'
       ))
table(phe_0_1$type_by_0_1)
phe_basic$type_by_basic = ifelse(phe_basic$seurat_clusters %in% c(0,2,4,6),'Tcells',
                             ifelse(phe_basic$seurat_clusters %in% c(3),'Bcells','myeoloid'
                             ))
table(phe_basic$type_by_basic)
table(phe_basic$type_by_basic,phe_0_1$type_by_0_1)

gplots::balloonplot(table(phe_basic$type_by_basic,phe_0_1$type_by_0_1))

数据挖掘生物基因

0 人点赞