单细胞数据复现-肺癌文章代码复现9

2022-07-11 12:15:26 浏览数 (1)

单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648

单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619

单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043

单细胞数据复现-肺癌文章代码复现4https://cloud.tencent.com/developer/article/2006654

单细胞数据复现-肺癌文章代码复现5https://cloud.tencent.com/developer/article/2008487

单细胞数据复现-肺癌文章代码复现6https://cloud.tencent.com/developer/article/2008704

单细胞数据复现-肺癌文章代码复现7https://cloud.tencent.com/developer/article/2019634

单细胞数据复现-肺癌文章代码复现8https://cloud.tencent.com/developer/article/2026259

细胞比例计算

代码语言:javascript复制
###cell proportion statistics

test <- cell_counts_rel %>%
  mutate(pattern = ifelse(patient_id %in% c("p032", "p018", "p019", "p024", "p031", "p033"), "N3MC", "CP2E")) %>%
  mutate(T_CD8_exhausted = T_CD8_1   T_CD8_2)

#for Kim et al. dataset
#test <- cell_counts_rel %>%
#  mutate(pattern = ifelse(patient_id %in% c("P0009", "P0018", "P0020", "P0028"), "CP2E", "N3MC")) %>%
#  mutate(T_CD8_exhausted = T_CD8_1   T_CD8_2)

ggplot(test, aes(x = pattern, y = CD14_Macrophages1))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = CD14_Macrophages1~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_CD14_Macrophages1.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = CD14_Macrophages2))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = CD14_Macrophages2~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_CD14_Macrophages2.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Myeloid_Dendritic))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = Myeloid_Dendritic~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Myeloid_Dendritic.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Plasmacytoid_Dendritic))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = Plasmacytoid_Dendritic~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Plasmacytoid_Dendritic.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Myofibroblast1))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = Myofibroblast1~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Myofibroblast1.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = Myofibroblast2))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = Myofibroblast2~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_Myofibroblast2.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = T_conv1))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = T_conv1~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_T_conv1.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = NK_cells))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = NK_cells~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_NK_cells.pdf", path = "../results", width = 10, height = 10, units = "cm")

ggplot(test, aes(x = pattern, y = T_CD8_exhausted))  
  geom_boxplot()  
  ggtitle(paste0("p = ", wilcox.test(formula = T_CD8_exhausted~pattern, data = test, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig8A_Group1vs2_T_CD8_exhausted.pdf", path = "../results", width = 10, height = 10, units = "cm")
图片.png图片.png

拼图的时候发现我又缺了一张图,天天丢来丢去的我,不过其他的图跟这个代码是类似的,所以基本上按照上面的代码,结果还是可以出来的。

这次的内容紧接着上次的复现7的内容。

代码语言:javascript复制
###select only cell types that occured in at least three patients

celltype_selected <- cell_counts_rel
celltype_selected[celltype_selected == 0] <- NA
celltype_selected <- cell_counts_rel[, which(colMeans(!is.na(celltype_selected)) >= 0.3)]

cell_counts_rel_selected <- cell_counts_rel[, colnames(cell_counts_rel) %in% colnames(celltype_selected)]

cell_counts_rel_mtrx <- as.matrix(cell_counts_rel_selected[,-1])
rownames(cell_counts_rel_mtrx) <- cell_counts_rel_selected$patient_id

下面是开始以前的PCA分析。

代码语言:javascript复制
###principal component analysis

cell_counts_pca <- as.data.frame(scores(pca(cell_counts_rel_mtrx, nPcs = 10)))
cell_counts_pca$patient_id <- rownames(cell_counts_pca)


histo_data <- unique(FetchData(epi_tumor, c("patient_id", "histo_subtype")))
cell_counts_pca <- full_join(cell_counts_pca, histo_data, by = "patient_id")

ggplot(cell_counts_pca, aes(x = PC1, y = PC2, color = histo_subtype, label = patient_id))  
  geom_point(size = 2)  
  geom_text(aes(label = patient_id), color = "black", vjust = 1.5, size = 2)  
  scale_color_manual(values = use_colors)  
  scale_x_continuous(limits = c(-1, 1.2))  
  scale_y_continuous(limits = c(-1.2, 0.8))
ggsave2("Fig5A.pdf", path = "../results", width = 15, height = 9, units = "cm")


###add mean tumor cell signature score

epi_pca <- AddModuleScore(epi_pca, features = list(c("SCGB3A1", "PIGR", "NAPSA", "C4BPA", "SCGB3A2", "HLA-DRA", "CD74", "ADGRF5", "C16orf89", "FOLR1", "SELENBP1", "HLA-DRB1", "ID4", "MGP", "AQP3", "CA2", "LHX9", "HLA-DPB1", "FMO5", "GKN2", "C5", "MUC1", "NPC2", "RNASE1", "PIK3C2G", "SFTA2", "SLC34A2", "HLA-DPA1", "FGFR3", "PGC")), name = "tumor_signature")
epi_pca <- AddModuleScore(epi_pca, features = list(c("DSG2", "CAMK2N1", "FAM3C", "KRT7", "IFI27", "SLC2A1", "MARCKS", "PLAU", "AHNAK2", "PERP", "S100A4", "KRT19", "COL6A1", "UACA", "COL17A1", "CDA", "TPM2", "S100A16", "KRT8", "PRSS23", "DST", "LAMC2", "S100P", "PRSS3", "LAMA3", "DSP", "ITGA3", "MDK", "FAM83A", "ITGB4")), name = "anti_tumor_signature")

epi_tumor_data <- FetchData(epi_pca, c(colnames(epi_pca@meta.data)))

epi_tumor_signature <- epi_tumor_data %>%
  group_by(patient_id) %>% 
  mutate(diff_mean = mean(tumor_signature1)) %>%
  mutate(undiff_mean = mean(anti_tumor_signature1)) %>%
  dplyr::select(patient_id, diff_mean, undiff_mean) %>%
  unique()

###heatmap of normalized immune and stromal cell counts, and mean tumor cell signatures

cell_counts_rel_heatmap <- full_join(cell_counts_rel_selected, cell_counts_pca, by = "patient_id")
cell_counts_rel_heatmap <- full_join(cell_counts_rel_heatmap, epi_tumor_signature, by = "patient_id")

cell_counts_rel_heatmap_ordered <- cell_counts_rel_heatmap[order(cell_counts_rel_heatmap$PC1),]

##R语言中的ncol()函数用于返回指定矩阵的列数
cell_counts_rel_heatmap1 <- cell_counts_rel_heatmap_ordered[,2:ncol(celltype_selected)]
cell_counts_rel_heatmap2 <- cell_counts_rel_heatmap_ordered[,c("diff_mean", "undiff_mean")]

rownames(cell_counts_rel_heatmap1) <- cell_counts_rel_heatmap_ordered$patient_id
rownames(cell_counts_rel_heatmap2) <- cell_counts_rel_heatmap_ordered$patient_id

##scale()函数标准化时使用的样本标准分的计算方法
cell_counts_rel_heatmap1 <- scale(cell_counts_rel_heatmap1)

heatmap.2(as.matrix(t(cell_counts_rel_heatmap1)), trace = "none", density.info = "none", scale = "none", symbreaks = F, col = hcl.colors(100, palette = "orrd", rev = T), Rowv = T, Colv = F, dendrogram = "row", margins = c(5,10))
ggsave("Fig5B_immune_and_stromal_cells.pdf", width = 7, height = 7)

heatmap.2(as.matrix(t(cell_counts_rel_heatmap2)), trace = "none", density.info = "none", scale = "none", col = bluered(100), Rowv = F, Colv = F, dendrogram = "none")
ggsave("Fig5B_tumor_cell_signatures.pdf", width = 7, height = 5)

相关性图绘制

代码语言:javascript复制
###correlation plots
##可以用于后续画图的比较***
# matrix of the p-value of the correlation (from http://www.sthda.com/english/wiki/visualize-correlation-matrix-using-correlogram)
cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i   1):n) {
      tmp <- cor.test(mat[, i], mat[, j], method = "spearman", ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

p.mat <- cor.mtest(cell_counts_rel_mtrx)

test <- cor(cell_counts_rel_mtrx, method = "spearman")

png("SuppFig8A.png", width = 30, height = 30, units = "cm", res = 600)
corrplot(test, p.mat = p.mat, sig.level = 0.05, insig = "blank", type = "full", tl.col = "black")
dev.off()

#pdf("output/fig5/corrplot.pdf", width = 7, height = 7)
#corrplot(test, p.mat = p.mat, sig.level = 0.05, insig = "blank", type = "full", tl.col = "black")
#dev.off()

### correlation network plot
##后续可以用***
test_net <- test

for (i in 1:nrow(test_net)){
  for (j in 1:ncol(test_net)){
    if(p.mat[i,j] > 0.05){
      test_net[i,j] <- 0
    }
    if(test_net[i,j] == 1){
      test_net[i,j] <- 0
    }
  }
}

library(qgraph)

pdf("Fig5C.pdf", width = 6, height = 6)
qgraph(test_net, layout = "spring", threshold = 0.7, labels = colnames(test_net), label.scale = F, label.cex = 0.7, theme = "colorblind", vsize = 3, color = "grey", borders = F)
dev.off()


#####scatter plots

cell_counts_rel <- mutate(cell_counts_rel, T_CD8_exhausted = T_CD8_1 T_CD8_2)

###plots stromal

ggplot(cell_counts_rel, aes(x = Myofibroblast1, y = Myofibroblast2))  
  geom_point(aes(color = patient_id), size = 4)  
  scale_x_continuous(limits = c(0,0.8))  
  scale_y_continuous(limits = c(0,0.8))  
  scale_color_manual(values = use_colors)  
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black")  
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$Myofibroblast1, cell_counts_rel$Myofibroblast2, method = "spearman")$p.value, 
                 "nr = ",cor.test(cell_counts_rel$Myofibroblast1, cell_counts_rel$Myofibroblast2, method = "spearman")$estimate))
ggsave2("Fig3E.pdf", path = "../results", width = 10, height = 9, units = "cm")


###plots immune

ggplot(cell_counts_rel, aes(x = CD14_Macrophages1, y = CD14_Macrophages2))  
  geom_point(aes(color = patient_id), size = 4)  
  scale_x_continuous(limits = c(0,0.6))  
  scale_y_continuous(limits = c(0,0.8))  
  scale_color_manual(values = use_colors)  
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black")  
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$CD14_Macrophages1, cell_counts_rel$CD14_Macrophages2, method = "spearman")$p.value, 
                 "nr = ",cor.test(cell_counts_rel$CD14_Macrophages1, cell_counts_rel$CD14_Macrophages2, method = "spearman")$estimate))
ggsave2("Fig4D_Macrophages1_2.pdf", path = "../results", width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = CD14_Macrophages2, y = Plasmacytoid_Dendritic))  
  geom_point(aes(color = patient_id), size = 4)  
  scale_x_continuous(limits = c(0,0.8))  
  scale_y_continuous(limits = c(0,0.06))  
  scale_color_manual(values = use_colors)  
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black")  
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$Plasmacytoid_Dendritic, method = "spearman")$p.value, 
                 "nr = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$Plasmacytoid_Dendritic, method = "spearman")$estimate))
ggsave2("Fig4D_Macrophages2_pDC.pdf", path = "../results", width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = T_reg, y = T_CD8_exhausted))  
  geom_point(aes(color = patient_id), size = 4)  
  scale_x_continuous(limits = c(0,0.5))  
  scale_y_continuous(limits = c(0,0.85))  
  scale_color_manual(values = use_colors)  
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black")  
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$T_reg, cell_counts_rel$T_CD8_exhausted, method = "spearman")$p.value, 
                 "nr = ",cor.test(cell_counts_rel$T_reg, cell_counts_rel$T_CD8_exhausted, method = "spearman")$estimate))
ggsave2("Fig4G_T_reg_exhausted.pdf",  width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = CD14_Macrophages2, y = T_CD8_exhausted))  
  geom_point(aes(color = patient_id), size = 4)  
  scale_x_continuous(limits = c(0,0.8))  
  scale_y_continuous(limits = c(0,0.85))  
  scale_color_manual(values = use_colors)  
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black")  
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$T_CD8_exhausted, method = "spearman")$p.value, 
                 "nr = ",cor.test(cell_counts_rel$CD14_Macrophages2, cell_counts_rel$T_CD8_exhausted, method = "spearman")$estimate))
ggsave2("Fig4G_Macrophages_exhausted.pdf", width = 10, height = 9, units = "cm")

ggplot(cell_counts_rel, aes(x = Plasmacytoid_Dendritic, y = T_CD8_exhausted))  
  geom_point(aes(color = patient_id), size = 4)  
  scale_x_continuous(limits = c(0,0.06))  
  scale_y_continuous(limits = c(0,0.85))  
  scale_color_manual(values = use_colors)  
  geom_smooth(method = "lm", se = FALSE, size = 1, color = "black")  
  ggtitle(paste0("p = ",cor.test(cell_counts_rel$Plasmacytoid_Dendritic, cell_counts_rel$T_CD8_exhausted, method = "spearman")$p.value, 
                 "nr = ",cor.test(cell_counts_rel$Plasmacytoid_Dendritic, cell_counts_rel$T_CD8_exhausted, method = "spearman")$estimate))
ggsave2("Fig4G_pDC_exhausted.pdf",  width = 10, height = 9, units = "cm")

compare cell counts with IHC quantification

代码语言:javascript复制
###compare cell counts with IHC quantification

CD123 <- read.table("Quantification_CD123.txt")
CD123_scrnaseq <- cell_counts_rel %>% select(Plasmacytoid_Dendritic)
CD123 <- full_join(CD123, CD123_scrnaseq, by = "patient_id")
CD123 <- pivot_longer(CD123, cols = starts_with("image"), names_to = "image_nr", values_to = "count")
CD123_average <- CD123 %>% group_by(patient_id) %>% mutate(mean = mean(count)) %>% mutate(sd = sd(count)) %>% select(c(patient_id, Plasmacytoid_Dendritic, mean, sd)) %>% unique()
ggplot()  
  geom_jitter(data = CD123, mapping = aes(x = Plasmacytoid_Dendritic, y = count, color = patient_id), size = 0.5, alpha = 0.3)  
  geom_smooth(data = CD123_average, mapping = aes(x = Plasmacytoid_Dendritic, y = mean, color = patient_id), method = "lm", se = FALSE, size = 1, color = "black")  
  geom_errorbar(data = CD123_average, aes(x = Plasmacytoid_Dendritic, ymin=mean-sd, ymax=mean sd, color = patient_id), size = 0.5, width = 0.002)  
  geom_point(data = CD123_average, aes(x = Plasmacytoid_Dendritic, y=mean, color = patient_id), shape=15, size = 3)  
  scale_color_manual(values = use_colors)  
  ggtitle(paste0("p = ",cor.test(CD123_average$mean, CD123_average$Plasmacytoid_Dendritic, method = "pearson")$p.value, 
                 "nr = ",cor.test(CD123_average$mean, CD123_average$Plasmacytoid_Dendritic, method = "pearson")$estimate))
ggsave2("Fig4E_pDC_CD123.pdf", path = "../results", width = 10, height = 9, units = "cm")

CTHRC1 <- read_excel("Quantification_CTHRC1.xlsx")
CTHRC1_scrnaseq <- cell_counts_rel %>% select(Myofibroblast2)
CTHRC1 <- full_join(CTHRC1, CTHRC1_scrnaseq, by = "patient_id")
CTHRC1 <- pivot_longer(CTHRC1, cols = starts_with("image"), names_to = "image_nr", values_to = "count")
CTHRC1_average <- CTHRC1 %>% group_by(patient_id) %>% mutate(mean = mean(count)) %>% mutate(sd = sd(count)) %>% select(c(patient_id, Myofibroblast2, mean, sd)) %>% unique()
ggplot()  
  geom_jitter(data = CTHRC1, mapping = aes(x = Myofibroblast2, y = count, color = patient_id), size = 0.5, alpha = 0.3)  
  geom_smooth(data = CTHRC1_average, mapping = aes(x = Myofibroblast2, y = mean, color = patient_id), method = "lm", se = FALSE, size = 1, color = "black")  
  geom_errorbar(data = CTHRC1_average, aes(x = Myofibroblast2, ymin=mean-sd, ymax=mean sd, color = patient_id), size = 0.5)  
  geom_point(data = CTHRC1_average, aes(x = Myofibroblast2, y=mean, color = patient_id), shape=15, size = 3)  
  scale_color_manual(values = use_colors)  
  ggtitle(paste0("p = ",cor.test(CTHRC1_average$mean, CTHRC1_average$Myofibroblast2, method = "pearson")$p.value, 
                 "nr = ",cor.test(CTHRC1_average$mean, CTHRC1_average$Myofibroblast2, method = "pearson")$estimate))
ggsave2("Fig3F_Myofibroblast2_CTHRC1.pdf", path = "../results", width = 10, height = 9, units = "cm")


CXCL9 <- read_excel("Quantification_CXCL9.xlsx")
CXCL9_scrnaseq <- cell_counts_rel %>% select(CD14_Macrophages2)
CXCL9 <- full_join(CXCL9, CXCL9_scrnaseq, by = "patient_id")
CXCL9 <- pivot_longer(CXCL9, cols = starts_with("image"), names_to = "image_nr", values_to = "count")
CXCL9_average <- CXCL9 %>% group_by(patient_id) %>% mutate(mean = mean(count)) %>% mutate(sd = sd(count)) %>% select(c(patient_id, CD14_Macrophages2, mean, sd)) %>% unique()
ggplot()  
  geom_jitter(data = CXCL9, mapping = aes(x = CD14_Macrophages2, y = count, color = patient_id), size = 0.5, alpha = 0.3, width = 0.02)  
  geom_smooth(data = CXCL9_average, mapping = aes(x = CD14_Macrophages2, y = mean, color = patient_id), method = "lm", se = FALSE, size = 1, color = "black")  
  geom_errorbar(data = CXCL9_average, aes(x = CD14_Macrophages2, ymin=mean-sd, ymax=mean sd, color = patient_id), size = 0.5, width = 0.03)  
  geom_point(data = CXCL9_average, aes(x = CD14_Macrophages2, y=mean, color = patient_id), shape=15, size = 3)  
  scale_color_manual(values = use_colors)  
  ggtitle(paste0("p = ",cor.test(CXCL9_average$mean, CXCL9_average$CD14_Macrophages2, method = "pearson")$p.value, 
                 "nr = ",cor.test(CXCL9_average$mean, CXCL9_average$CD14_Macrophages2, method = "pearson")$estimate))
ggsave2("Fig4E_CD14_Macrophages_2_CXCL9.pdf", path = "../results", width = 10, height = 9, units = "cm")

就感觉原代码可视化出来的这几个图好丑,应该是要改参数,但是我们的重点不是分析可以进行吗,我就没改,如果友友觉得需要可视化的很漂亮,可以自己改参数哦

总结

下面是做的TCGA的分析,但是我做的是植物的,因此我就没做后面的结果。

因此对上面的一系列的流程结果进行总结,可以发现是首先流程性的分析,然后开始对注释后的亚群进行了精细的分析,将不同的细胞类群叶放到脚本里面进行分析。其中有很多的比较好用的函数,比如绘制热图、计算相关性的函数,还有一些比较好用的R包,可以用来表格的数据的整理,因此需要整理一下里面的基本的函数,以及这些的基础,加强后期对R语言基础的学习。

0 人点赞