单细胞RNA-seq揭示TNBC的异质性(图表复现01)

2021-03-04 14:33:50 浏览数 (2)

后起之秀奔涌而至,欢迎大家在《生信技能树》的舞台分享自己的心得体会!(文末有惊喜)

下面是《单细胞天地》小编日行一膳的投稿

前面的单细胞RNA-seq揭示TNBC的异质性(图表复现01),我们一起复现了“ Unravelling subclonal heterogeneity and aggressive disease states in TNBC through single-cell RNA-seq ” 的Figure 1 ,今天继续来复现一下Figure 2的相关内容:

复现图表

Fig2a
代码重复
1.操作准备流程
代码语言:javascript复制
# TSNE六个患者的全部细胞
set.seed(7)#作者不讲武德,如果不设置这个你的图基本上和作者的不一样(参考生信技能书之前的推文)
to_plot_ct <- unique(pd_ct$cell_types_cl_all)
mat_short_ct <- mat_ct[, which(pd_ct$cell_types_cl_all %in% to_plot_ct)]
pd_short_ct <- pd_ct[which(pd_ct$cell_types_cl_all %in% to_plot_ct), ]
tsne_short_ct <- Rtsne(t(mat_short_ct), perplexity = 30)
colnames(tsne_short_ct$Y) <- c("col1", "col2")
tsne_short_ct$Y <- as.data.frame(tsne_short_ct$Y)
tsne_short_ct$Y$cell_types_cl_all <- pd_short_ct$cell_types_cl_all
tsne_short_ct$Y$cell_types_markers <- pd_short_ct$cell_types_markers
tsne_short_ct$Y$patient <- pd_short_ct$patient
2.可视化操作
代码语言:javascript复制
ggplot(tsne_short_ct$Y, aes(x = col2, y = col1,shape = patient,color = factor(cell_types_cl_all, #这里需要两个坐标反一下,就可以得出和作者相同的图像
                                                               levels = names(anno_colors$tsne))))   
  geom_point(size = 4)   
  scale_color_manual(values = anno_colors$tsne)  
  labs(col = "patient", x = "Component 1", y = "Component 2", shape = "patient") 
  theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line=element_line(colour = "black"))

图片展示

Fig2b
代码重复
1.操作准备流程
代码语言:javascript复制
to_plot_ct <- c("Bcell", "macrophage", "Tcell", "stroma", "endothelial")
mat_short_ct <- mat_ct[, which(pd_ct$cell_types_cl_all %in% to_plot_ct)]
pd_short_ct <- pd_ct[which(pd_ct$cell_types_cl_all %in% to_plot_ct), ]
tsne_short_ct <- Rtsne(t(mat_short_ct), perplexity = 30)
colnames(tsne_short_ct$Y) <- c("col1", "col2")
tsne_short_ct$Y <- as.data.frame(tsne_short_ct$Y)
tsne_short_ct$Y$cell_types_cl_all <- pd_short_ct$cell_types_cl_all
tsne_short_ct$Y$cell_types_markers <- pd_short_ct$cell_types_markers
tsne_short_ct$Y$patient <- pd_short_ct$patient
2.可视化操作
代码语言:javascript复制
ggplot(tsne_short_ct$Y, aes(x =col1, y = col2, color = factor(cell_types_cl_all, levels = names(anno_colors$tsne)), 
                            shape = patient))   
  geom_point(size = 4)   
  labs(col = "cell type", x = "Component 1", y = "Component 2")   
  scale_color_manual(values = anno_colors$tsne) 
  theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line=element_line(colour = "black"))

图片展示

Fig2c
代码重复
1.操作准备流程
代码语言:javascript复制
to_plot_ct <- c("epithelial")
mat_short_ct <- mat_ct[, which(pd_ct$cell_types_cl_all %in% to_plot_ct)]
pd_short_ct <- pd_ct[which(pd_ct$cell_types_cl_all %in% to_plot_ct), ]
tsne_short_ct <- Rtsne(t(mat_short_ct), perplexity = 30, verbose=TRUE, max_iter = 500)
colnames(tsne_short_ct$Y) <- c("col1", "col2")
tsne_short_ct$Y <- as.data.frame(tsne_short_ct$Y)
tsne_short_ct$Y$cell_types_cl_all <- pd_short_ct$cell_types_cl_all
tsne_short_ct$Y$cell_types_markers <- pd_short_ct$cell_types_markers
tsne_short_ct$Y$patient <- pd_short_ct$patient
2.可视化
代码语言:javascript复制
ggplot(tsne_short_ct$Y, aes(x = col1, y = col2, color = factor(patient, levels = names(anno_colors$patient)), 
                            shape = cell_types_cl_all))   
  geom_point(size = 4)   
  scale_color_manual(values = anno_colors$patient)  
  labs(col = "patient", x = "Component 1", y = "Component 2", shape = "cell type") 
  theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank(),
        panel.background = element_blank(),axis.line=element_line(colour = "black"))

图片展示

Fig2d
代码重复
1.操作准备流程
代码语言:javascript复制
library(here)
tpm.rsem <- read.table("tpm_original.txt", sep = "t")
#读取标准化后的数据
nick_normalize <- read.table("RSEM_TPM_240_NormalCells.txt", sep = "t", header = TRUE)
#读取正常单个乳腺细胞的数据

tpm.cnv <- tpm.rsem[match(intersect(rownames(tpm.rsem), rownames(nick_normalize)), rownames(tpm.rsem)),]#匹配我们现有的基因名称
nick_normalize <- nick_normalize[match(intersect(rownames(tpm.rsem), rownames(nick_normalize)), rownames(nick_normalize)),]
all.equal(rownames(tpm.cnv), rownames(nick_normalize))
#只保留感兴趣的细胞
tpm.cnv <- tpm.cnv[,match(colnames(mat_ct), colnames(tpm.cnv))]


log.tpm.cnv <- log2(tpm.cnv   1)
log.nick_normalize <- log2(nick_normalize   1)
#变换log2 (TPM   1)

log.tpm.cnv <- log.tpm.cnv/10
log.nick_normalize <- log.nick_normalize/10
#除以10来模拟100万的库复杂度

colnames(log.nick_normalize) <- paste("normal", c(1:ncol(log.nick_normalize)), sep = "")
#重命名正常细胞
threshold_to_keep <- 0.1
log.tpm.cnv <- log.tpm.cnv[(apply(log.tpm.cnv, 1, mean) >= threshold_to_keep),]
#确保所有的基因都高表达于临界值

if (length(which(is.na(match(rownames(log.tpm.cnv), rownames(mat_ct))))) == 0)
  fd.cnas <- fd_norm[match(rownames(log.tpm.cnv), rownames(mat_ct)),]
all.equal(rownames(fd.cnas), rownames(log.tpm.cnv))
mappings.cnas <- fd.cnas[,c("ensembl_gene_id", "hgnc_symbol", "chromosome_name", "start_position", "end_position")]
#获取过滤后基因的映射坐标

chr.st.cnas <- paste(mappings.cnas$chromosome_name, mappings.cnas$start_position, sep = " ")
order.map.cnas <- match(gtools::mixedsort(chr.st.cnas), chr.st.cnas)
mappings.cnas <- mappings.cnas[order.map.cnas, ]
if (length(which(duplicated(mappings.cnas))) > 0)
  mappings.cnas <- mappings.cnas[-which(duplicated(mappings.cnas)), ]
mappings.cnas <- as.data.frame(mappings.cnas) %>% dplyr::filter(chromosome_name %in% c(1:24, "X", "Y", "x", "y"))
mappings.cnas$chromosome_name <- revalue(mappings.cnas$chromosome_name, c("X" = "23", "x" = "23", "Y" = "24", "y" = "24"))
rownames(mappings.cnas) <- mappings.cnas$hgnc_symbol
#根据基因在染色体上的位置对基因进行排序

log.tpm.cnv <- log.tpm.cnv[match(mappings.cnas$hgnc_symbol, rownames(log.tpm.cnv)),]
all.equal(rownames(log.tpm.cnv), rownames(mappings.cnas))
#根据映射顺序对TPM数据中的基因进行排序

log.nick_normalize <- log.nick_normalize[match(rownames(log.tpm.cnv), rownames(log.nick_normalize)),]
all.equal(rownames(log.nick_normalize), rownames(log.tpm.cnv))
#将正常数据集限制在相同的基因上

mean.genes <- apply(log.nick_normalize, 1, mean)
#计算正常细胞的平均值

cnv.data <- sweep(log.tpm.cnv, 1, mean.genes)
#从肿瘤数据中去除平均正常表达

cnv.data <- cbind(log.nick_normalize, cnv.data)
#在肿瘤数据中加入正常的上皮细胞

if (length(which(cnv.data > 3) > 0))
  cnv.data <- cnv.data[-which(cnv.data > 3)]
if (length(which(cnv.data < -3) > 0))
  cnv.data <- cnv.data[-which(cnv.data < -3)]
#消除数据中的极值

window.size <- 101
sl.avg <- apply(cnv.data, 2, caTools::runmean, k = window.size)
rownames(sl.avg) <- rownames(cnv.data)
#计算滑动窗口

scaled.sl.avg <- scale(sl.avg, scale = FALSE)
#中心CNA值

clustering_method_columns <- "ward.D"
clustering_distance_columns <- "pearson"
#聚类方法与度量
2.热图注释
代码语言:javascript复制
## 聚集所有上皮细胞作为注释
pd_epith <- readRDS(here::here("data", "pd_epith.RDS"))
clustering_allepith <- pd_epith$Cluster

patients_now <- sort(unique(pd_epith$patient))
clusterings_sep_allepith <- list()
for (i in patients_now) {
  clusterings_sep_allepith[[i]] <- clustering_allepith[which(pd_epith$patient == i)]
  names(clusterings_sep_allepith[[i]]) <- rownames(pd_epith)[which(pd_epith$patient == i)]
}
#将数据分为正常组和上皮组,分别绘制热图
norm_oth_idx <- which(pd_ct$cell_types_cl_all != "epithelial")

mat_normepith_cna <- scaled.sl.avg[,1:ncol(log.nick_normalize)]
scaled.sl.avg <- scaled.sl.avg[,-c(1:ncol(log.nick_normalize))]

all.equal(rownames(pd_ct), colnames(scaled.sl.avg))
mat_normoth_cna <- scaled.sl.avg[,norm_oth_idx]
pd_normoth_ct <- pd_ct[norm_oth_idx,]

mats_ct <- list()
pds_ct <- list()
for (i in 1:length(patients_now)) {
  mats_ct[[i]] <- mat_ct[,pd_ct$patient == patients_now[i]]
  pds_ct[[i]] <- pd_ct[pd_ct$patient == patients_now[i],]
}
names(mats_ct) <- patients_now
names(pds_ct) <- patients_now


mats_epith_cna <- list()
pds_epith_ct <- list()
for (i in 1:length(patients_now)) {
  mats_epith_cna[[i]] <- scaled.sl.avg[,intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[i]))]
  pds_epith_ct[[i]] <- pds_ct[[i]][which(pds_ct[[i]]$cell_types_cl_all == "epithelial"),]
}
names(mats_epith_cna) <- patients_now
names(pds_epith_ct) <- patients_now


#正常上皮细胞的热图注释
ha_cnas_normepith <- HeatmapAnnotation(df=data.frame(celltype = "normal"), 
                                       col = list(celltype = c("normal" = "violet")),
                                       show_annotation_name = TRUE,
                                       annotation_name_side = "left", 
                                       annotation_name_gp = gpar(fontsize = 8),
                                       annotation_legend_param = list(list(title_position = "topcenter",
                                                                           title = "cell type")),
                                       show_legend = TRUE,
                                       gap = unit(c(2), "mm"))

#热图注释的所有上皮细胞与单独的集群
ha_cnas_epith_sep <- list()
for (i in 1:length(patients_now)) {
  
  if (i == 1)
    ha_cnas_epith_sep[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]),
                                                col = list(cluster_all = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2")),
                                                annotation_name_side = "left", annotation_name_gp = gpar(fontsize = 12),
                                                annotation_legend_param = list(list(title_position = "topcenter", title = c(paste("clust_all")))),
                                                show_annotation_name = FALSE,
                                                gap = unit(c(2), "mm"),
                                                show_legend = FALSE)
  
  if (i > 1 && i != 5)
    ha_cnas_epith_sep[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]),
                                                col = list(cluster_all = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2")),
                                                annotation_name_side = "left", annotation_name_gp = gpar(fontsize = 12),
                                                annotation_legend_param = list(list(title_position = "topcenter", title = c(paste("clust_all")))),
                                                show_annotation_name = FALSE,
                                                gap = unit(c(2), "mm"),
                                                show_legend = FALSE)
  
  if (i == 5)
    ha_cnas_epith_sep[[i]] <- HeatmapAnnotation(df=data.frame(cluster_all = clusterings_sep_allepith[[i]]),
                                                col = list(cluster_all = c("1" = "#ee204d", "2" = "#17806d", "3" = "#b2ec5d", "4" = "#cda4de", "5" = "#1974d2")),
                                                annotation_name_side = "right", annotation_name_gp = gpar(fontsize = 12),
                                                annotation_legend_param = list(list(title_position = "topcenter", title = c(paste("clust_all")))),
                                                show_annotation_name = FALSE,
                                                gap = unit(c(2), "mm"),
                                                show_legend = TRUE)
}
#对正常人和每个上皮细胞患者分别绘制热图
all.equal(rownames(mats_epith_cna[[1]]), mappings.cnas$hgnc_symbol)

splits_chroms <- as.factor(mappings.cnas$chromosome_name)
splits_chroms <- factor(splits_chroms, levels(splits_chroms)[c(1, 12, 18:24, 2:11, 13:17)])

cols_chroms <- rep(c("black", "gray"), 12)
names(cols_chroms) <- c(1:24)

ha_rows_chroms <- HeatmapAnnotation(df = data.frame(chromosome = mappings.cnas$chromosome_name),
                                    annotation_legend_param = list(chromosome = list(ncol = 2, title = "chromosome", title_position = "topcenter")),
                                    which = "row", col = list("chromosome" = cols_chroms), annotation_width = unit(3, "mm"))
3.可视化
代码语言:javascript复制
ht_cnas_chroms_high <- ha_rows_chroms   
  Heatmap(mat_normepith_cna, 
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")),
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = "norm epith", 
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          split = splits_chroms, 
          column_title = "Normal", column_title_gp = gpar(fontsize = 10),
          show_heatmap_legend =F,
          gap = unit(1, "mm"),
          use_raster = TRUE)   
  Heatmap(mats_epith_cna[[1]],
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")), 
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = patients_now[1], 
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          column_title = patients_now[1], column_title_gp = gpar(fontsize = 10),
          show_heatmap_legend = F,
          gap = unit(1, "mm"),
          use_raster = TRUE)   
  Heatmap(mats_epith_cna[[2]],
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")),
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = patients_now[2], 
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          column_title = patients_now[2], column_title_gp = gpar(fontsize = 10),
          show_heatmap_legend =F,
          gap = unit(1, "mm"),
          use_raster = TRUE)  
  Heatmap(mats_epith_cna[[3]],
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")),
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = patients_now[3], 
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          column_title = patients_now[3], column_title_gp = gpar(fontsize = 10),
          show_heatmap_legend = F,
          gap = unit(1, "mm"),
          use_raster = TRUE)  
  Heatmap(mats_epith_cna[[4]],
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")),
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = patients_now[4],
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          column_title = patients_now[4], column_title_gp = gpar(fontsize = 10),
          show_heatmap_legend = F, 
          gap = unit(1, "mm"),
          use_raster = TRUE)  
  Heatmap(mats_epith_cna[[5]],
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")),
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = patients_now[5], 
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          column_title = patients_now[5], column_title_gp = gpar(fontsize = 10),
          show_heatmap_legend = F, 
          gap = unit(1, "mm"),
          use_raster = TRUE)  
  Heatmap(mats_epith_cna[[6]],
          col = colorRamp2(c(-0.7, 0, 0.7), c("blue", "white", "red")),
          cluster_rows = FALSE, show_column_dend = TRUE, show_column_names = FALSE, show_row_names = FALSE, 
          name = patients_now[6],
          clustering_distance_columns = clustering_distance_columns, clustering_method_columns = clustering_method_columns,
          column_title = patients_now[6], column_title_gp = gpar(fontsize = 10),
          heatmap_legend_param = list(title = "Expression score", title_position  = "topleft", color_bar = "continuous",
                                      title_gp = gpar(fontsize = 8), labels_gp = gpar(fontsize = 8)),
          show_heatmap_legend = TRUE, 
          gap = unit(1, "mm"),
          use_raster = TRUE)
ht_cnas_chroms_high 

图片展示

Fig2f
代码重复
1.操作准备流程
代码语言:javascript复制
mappings <- readRDS("mappings.RDS") #基因信息
qc <- read.table("qc_original.txt", sep = "t", stringsAsFactors = FALSE) # 质量控制
#从Nick Navin那里读取正常的单细胞乳腺数据
nick_normalize <- read.table("RSEM_TPM_240_NormalCells.txt", sep = "t", header = TRUE)
#把我们已知的基因名配对
mat_ct <- mat_ct[match(intersect(rownames(nick_normalize), rownames(mat_ct)), rownames(mat_ct)),]
nick_normalize <- nick_normalize[match(intersect(rownames(nick_normalize), rownames(mat_ct)), rownames(nick_normalize)),]
all.equal(rownames(mat_ct), rownames(nick_normalize))
mappings <- mappings[match(rownames(mat_ct), mappings$hgnc_symbol),]
all.equal(mappings$hgnc_symbol, rownames(mat_ct))
#正常细胞重命名
colnames(nick_normalize) <- paste("normal", c(1:ncol(nick_normalize)), sep = "")
2.处理输入文件
代码语言:javascript复制
#正常细胞质量控制
qc_normals <- matrix(NA, nrow = ncol(nick_normalize), ncol = ncol(qc))
qc_normals <- as.data.frame(qc_normals)
colnames(qc_normals) <- colnames(qc)
qc_normals$Sample <- colnames(nick_normalize)
qc_normals$experiment <- "Exp4"
qc_normals$pool_H12 <- 0
rownames(qc_normals) <- colnames(nick_normalize)
#sceset_norm初始化
fd_norm <- new("AnnotatedDataFrame", data = as.data.frame(mappings))
rownames(fd_norm) <- fd_norm$hgnc_symbol
pd_norm <- new("AnnotatedDataFrame", data = as.data.frame(qc_normals))
rownames(pd_norm) <- rownames(qc_normals)
sceset_norm <- SingleCellExperiment(assays = list(tpm = as.matrix(nick_normalize)), 
                                    rowData = as.data.frame(mappings),
                                    colData = as.data.frame(qc_normals))
# monocle TPM对象(转换为相对计数)
min_thresh_tpm <- 1
HSMM_norm <- newCellDataSet(tpm(sceset_norm), 
                            phenoData = pd_norm, 
                            featureData = fd_norm, 
                            expressionFamily = tobit(), lowerDetectionLimit = min_thresh_tpm)
HSMM_norm <- detectGenes(HSMM_norm, min_expr = min_thresh_tpm)
# monocle 添加相关性矩阵
rpc_matrix_norm <- relative2abs(HSMM_norm)
min_thresh_log_tpm <- 0.1
HSMM_norm <- newCellDataSet(as(as.matrix(rpc_matrix_norm), "sparseMatrix"),
                            phenoData = phenoData(HSMM_norm),
                            featureData = featureData(HSMM_norm),
                            lowerDetectionLimit = min_thresh_log_tpm,
                            expressionFamily = negbinomial.size())
HSMM_norm <- estimateSizeFactors(HSMM_norm)
HSMM_norm <- estimateDispersions(HSMM_norm)
#sceset_norm添加其他的表达信息
assay(sceset_norm, "exprs") <- log2(tpm(sceset_norm)   1)
assay(sceset_norm, "monocle") <- as.matrix(exprs(HSMM_norm))
assay(sceset_norm, "log2_tpm") <- log2(tpm(sceset_norm)   1)
assay(sceset_norm, "log2_monocle") <- as.matrix(log2(exprs(HSMM_norm)   1))
assay(sceset_norm, "log2_tpm_median") <- scale(assays(sceset_norm)$exprs, scale = FALSE)
exprs_filtered <- t(t(exprs(HSMM_norm)/pData(HSMM_norm)$Size_Factor))
nz_genes <- which(exprs_filtered != 0)
exprs_filtered[nz_genes] <- log2(exprs_filtered[nz_genes]   1)
assay(sceset_norm, "norm_log2_monocle") <- as.matrix(exprs_filtered)
rm(exprs_filtered)
rm(nz_genes)
#对Monocle counts进行标准化
sceset_norm_mon <- sceset_norm
counts(sceset_norm_mon) <- assays(sceset_norm)$monocle
sceset_norm_mon <- computeSumFactors(sceset_norm_mon, positive = TRUE)
if (length(which(sizeFactors(sceset_norm_mon) == 0)) > 0) {
  sceset_norm_mon <- sceset_norm_mon[,-which(sizeFactors(sceset_norm_mon) == 0)]
  set_exprs(sceset_norm_mon, "log2_tpm_median") <- scale(assays(sceset_norm_mon)$exprs, scale = FALSE)
}
summary(sizeFactors(sceset_norm_mon))
sceset_norm_mon <- scater::logNormCounts(sceset_norm_mon)
#消除不必要的变异的额外来源
housekeepers <- read.table( "housekeepers.txt", sep = "t")
colnames(housekeepers) <- "gene"
housekeepers <- unique((housekeepers))
housekeepers$index <- match(housekeepers$gene, rowData(sceset_norm_mon)$hgnc_symbol)
if (length(which(is.na(housekeepers$index)) > 0))
  housekeepers <- housekeepers[-which(is.na(housekeepers$index)), ]
ruvg1_norm_mon_scran <- RUVg(exprs(sceset_norm_mon), housekeepers$index, k = 1, isLog = 1)
assay(sceset_norm_mon, "ruvg1_mon_scran") <- ruvg1_norm_mon_scran$normalizedCounts
#合并两个矩阵(normals和TNBC)
mat_norm <- assays(sceset_norm_mon)$ruvg1_mon_scran
mat_complete <- cbind(mat_norm, mat_ct)
mat_complete 
pd_ct <- readRDS("pd_ct.RDS")
mat_ct 
order_samples_cnv <- readRDS("order_samples_cnv.RDS")
pd_epith <- readRDS("pd_epith.RDS")
patients_now <- sort(unique(pd_ct$patient))
#按患者上皮细胞排列,与二维拷贝数图相同的顺序,并在顶部添加正常细胞
ord_ct <- c(intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[1]))[order_samples_cnv[[3]]],
            intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[2]))[order_samples_cnv[[4]]],
            intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[3]))[order_samples_cnv[[5]]],
            intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[4]))[order_samples_cnv[[6]]],
            intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[5]))[order_samples_cnv[[7]]],
            intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[6]))[order_samples_cnv[[8]]])
ord_norm <- order_samples_cnv[[2]]
ord_ct <- ord_ct   length(ord_norm)
ord_ct <- c(ord_norm, ord_ct)
mat_cor <- mat_complete[, ord_ct]
ct_ord_per_pat <- pd_ct$cell_types_cl_all
ct_ord_per_pat[intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[1]))][order_samples_cnv[[3]]] <- paste("epith", patients_now[1], sep = "")
ct_ord_per_pat[intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[2]))][order_samples_cnv[[4]]] <- paste("epith", patients_now[2], sep = "")
ct_ord_per_pat[intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[3]))][order_samples_cnv[[5]]] <- paste("epith", patients_now[3], sep = "")
ct_ord_per_pat[intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[4]))][order_samples_cnv[[6]]] <- paste("epith", patients_now[4], sep = "")
ct_ord_per_pat[intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[5]))][order_samples_cnv[[7]]] <- paste("epith", patients_now[5], sep = "")
ct_ord_per_pat[intersect(which(pd_ct$cell_types_cl_all == "epithelial"), which(pd_ct$patient == patients_now[6]))][order_samples_cnv[[8]]] <- paste("epith", patients_now[6], sep = "")
ct_ord_per_pat <- c(rep("normal", length(ord_norm)), ct_ord_per_pat) 
ct_ord <- ct_ord_per_pat[ord_ct]
# 计算关联映射
cors_ct <- cor(mat_cor, method = "pearson")
3.注释信息
代码语言:javascript复制
epithelial_col <- brocolors("crayons")["Maroon"]
basal_epithelial_col <- brocolors("crayons")["Red"]
luminal_epithelial_col <- brocolors("crayons")["Sunset Orange"]
luminal_progenitor_col <- brocolors("crayons")["Salmon"]

stroma_col <- brocolors("crayons")["Aquamarine"]
endothelial_col <- brocolors("crayons")["Wisteria"]

PTPRC_col <- brocolors("crayons")["Inchworm"]
t_cell_col <- brocolors("crayons")["Screamin' Green"]
b_cell_col <- brocolors("crayons")["Fern"]
macrophage_col <- brocolors("crayons")["Tropical Rain Forest"]

tsne_cols <- c("epithelial" = unname(basal_epithelial_col), "stroma" = unname(stroma_col), "endothelial" = unname(endothelial_col),
               "Tcell" = unname(t_cell_col), "Bcell" = unname(b_cell_col), "macrophage" = unname(macrophage_col))

pats_cols <- c("PT039" = unname(brocolors("crayons")["Orange Red"]), "PT058" = unname(brocolors("crayons")["Orange"]), 
               "PT081" = unname(brocolors("crayons")["Pink Flamingo"]), "PT084" = unname(brocolors("crayons")["Fern"]), 
               "PT089" = unname(brocolors("crayons")["Blue Violet"]), "PT126" = unname(brocolors("crayons")["Sky Blue"]))

anno_colors <- list("tsne" = tsne_cols, "patient" = pats_cols)

cols_ct_pats <- c(anno_colors$tsne, anno_colors$patient)
names(cols_ct_pats)[(length(cols_ct_pats) - length(patients_now)   1):length(cols_ct_pats)] <- paste("epith",patients_now, sep = "")
cols_ct_pats <- c("normal" = "gray", cols_ct_pats)
if (length((which(names(cols_ct_pats) == "epithelial")) > 0))
  cols_ct_pats <- cols_ct_pats[-(which(names(cols_ct_pats) == "epithelial"))]
cols_ct_pats <- cols_ct_pats[match(c("normal", "stroma", "Tcell", "Bcell", "macrophage", "endothelial", paste("epith", patients_now, sep = "")), names(cols_ct_pats))]


#簇的注释
cluster_order_allepith <- pd_epith$Cluster[match(colnames(mat_cor)[(length(ord_norm)   1):length(colnames(mat_cor))], pd_epith$Sample)]

ha_cors_top <- HeatmapAnnotation(df=data.frame(celltype = ct_ord), 
                                 col = list(celltype = cols_ct_pats),
                                 show_annotation_name = FALSE,
                                 annotation_legend_param = list(list(title_position = "topcenter", title = "Epithelial cells")),
                                 show_legend = TRUE,
                                 gap = unit(c(2), "mm"))

ha_cors_rows <- HeatmapAnnotation(df=data.frame(celltype = ct_ord), 
                                  col = list(celltype = cols_ct_pats),
                                  show_annotation_name = FALSE,
                                  annotation_legend_param = list(list(title_position = "left", title = "Epithelial cells")),
                                  show_legend = FALSE,
                                  which = "row",
                                  gap = unit(c(2), "mm"))
4.可视化
代码语言:javascript复制
ht_cors <- ha_cors_rows   
  Heatmap(cors_ct, 
          name = "Spearman correlation",
          cluster_columns = FALSE,
          cluster_rows = FALSE,
          show_row_names = FALSE,
          show_column_names = FALSE,
          top_annotation = ha_cors_top,
          col = colorRamp2(c(0, 0.2, 0.3, 0.4), c("lightgray", "pink", "mediumorchid3", "purple")),
          use_raster = TRUE)
ht_cors

图片展示

全部的代码和数据,都可以在我们《生信技能树》公众号后台回复“tnbc”获取

未完待续……

0 人点赞