单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析8

2022-08-30 12:33:16 浏览数 (3)

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析1:https://cloud.tencent.com/developer/article/2055573

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析2:https://cloud.tencent.com/developer/article/2072069

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析3:https://cloud.tencent.com/developer/article/2078159

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析4:https://cloud.tencent.com/developer/article/2078348

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析5:https://cloud.tencent.com/developer/article/2084580

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析6:https://cloud.tencent.com/developer/article/2085385

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析7:https://cloud.tencent.com/developer/article/2085705

这节是作者提供的fig1和附表fig1的图形可视化的代码。

fig1.pngfig1.png
figs1figs1

代码解析

代码语言:javascript复制
###################################################
# Matt Regner
# Franco Lab 
###################################################
library(ggplot2)
library(Seurat)
##标准化函数scale()
library(scales)
##因子处理 forcats
library(forcats)
library(ArchR)
library(stringr)
library(stringi)
library(RColorBrewer)
library(dplyr)
# Make patient sample metadata and color assignments 
##给每个样本 选取颜色
sampleColors <- RColorBrewer::brewer.pal(11,"Paired")
sampleColors[11] <- "#8c8b8b"
pie(rep(1,11), col=sampleColors) 

# Color patient tumors to resemble the cancer ribbon color 
sampleColors <- c(sampleColors[5],sampleColors[7],sampleColors[6],sampleColors[8],sampleColors[10],sampleColors[9],sampleColors[4],sampleColors[3],sampleColors[2],sampleColors[11],sampleColors[1])
##给每个样本下定义,给与名字,及细胞的类型, 各种不同样本的信息指数
sampleAnnot <- data.frame(Sample = c("3533EL","3571DL","36186L","36639L",
                                     "366C5L","37EACL","38FE7L","3BAE2L","3CCF1L","3E4D1L","3E5CFL"),
                          Color = sampleColors,
                          Cancer = c("endometrial","endometrial","endometrial","endometrial","endometrial","endometrial",
                                     "ovarian","ovarian","ovarian","ovarian","ovarian"),
                          Histology = c("endometrioid","endometrioid","endometrioid","endometrioid","endometrioid",
                                        "serous","endometrioid","serous","carcinosarcoma","GIST","serous"),
                          BMI = c(39.89,30.5,38.55,55.29,49.44,29.94,34.8,22.13,23.72,33.96,22.37),
                          Age = c(70,70,70,49,62,74,76,61,69,59,59),
                          Race = c("AA","CAU","CAU","CAU","CAU","CAU","CAU","CAU","CAU","CAU","AS"),
                          Stage = c("IA","IA","IA","IA","IA","IIIA","IA","IIB","IVB","IV","IIIC"),
                          Site = c("Endometrium","Endometrium","Endometrium","Endometrium","Endometrium","Ovary","Ovary","Ovary","Ovary","Ovary","Ovary"),
                          Type = c("Endometrial","Endometrial","Endometrial","Endometrial","Endometrial","Endometrial","Ovarian","Ovarian","Ovarian","Gastric","Ovarian"))

# Read in RNA data for full cohort
##  读取上述的保存文件
rna <- readRDS("endo_ovar_All_scRNA_processed.rds")
# Read in matching ATAC data for full cohort (ArchR project after label transfer)
atac <- readRDS("proj_LSI_GeneScores_Annotations_Int.rds")

# Plot Patient UMAPs for RNA/ATAC
# RNA UMAP first
rna.df <- as.data.frame(rna@reductions$umap@cell.embeddings)
length(which(rownames(rna.df)==rownames(rna@meta.data)))
rna.df$Sample <- rna$Sample
rna.sample.plot <-ggplot(rna.df,aes(x = UMAP_1,y=UMAP_2,color = Sample)) 
  geom_point(size = .1) 
  theme_classic() 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2")  
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = sampleColors) 
  guides(colour = guide_legend(override.aes = list(size=6)))
rna.sample.plot  ggsave("Sample_RNA.pdf",width = 8,height = 7)

# ATAC UMAP second:
##plotEmbedding:archr嵌入数据
atac.df <- plotEmbedding(atac,colorBy = "cellColData",name = "Sample",embedding = "UMAP")
atac.df <- as.data.frame(atac.df$data)
atac.df$Sample <- gsub(".*-","",atac.df$color)

ggplot(atac.df,aes_string(x = "x",y="y",color = "Sample")) 
  geom_point(size = .1) 
  theme_classic() 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2") 
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = sampleColors) 
  guides(colour = guide_legend(override.aes = list(size=6))) 
ggsave(paste0("Sample_ATAC.pdf"),width = 8,height = 7)

# Plot cell type UMAPs for RNA/ATAC
# RNA
##与上面的代码基本一致,主要的不同是选取的是降维后的分组后,来对降维后的细胞类型进行可视化
##这里比较建议用多个snn的参数进行调试,作者这里面只是保留了0.7,可以参照肺癌的那个文章的代码,写的很好,可以拿来直接用
levels(factor(rna$RNA_snn_res.0.7))
rna.df$cluster <- rna$RNA_snn_res.0.7
rna.df$cell.type <- rna$cell.type
#Manually annotate 23-cluster as smooth muscle
#加入手动注释的结果
rna.df$cell.type <- str_replace_all(rna.df$cell.type,"23-Stromal fibroblast","23-Smooth muscle cells")
##因子转换
rna.df$cluster <- as.factor(rna.df$cluster)
rna.df$cell.type <- as.factor(rna.df$cell.type)
levels(rna.df$cluster)
levels(rna.df$cell.type)

# Help sort the cluster numbers:
###############################
##进行不同细胞类型的提取
epi <- grep("pitheli",levels(rna.df$cell.type))
epi.new <- grep("-Ciliated",levels(rna.df$cell.type))
epi <- c(epi,epi.new)
fibro <- grep("ibro",levels(rna.df$cell.type))
smooth <- grep("mooth",levels(rna.df$cell.type))
endo <- grep("dothel",levels(rna.df$cell.type))
t.nk <- grep("T cell",levels(rna.df$cell.type))
t.nk.new <- grep("Lym",levels(rna.df$cell.type))
t.nk <- c(t.nk,t.nk.new)
mac <- grep("acrophage",levels(rna.df$cell.type))
mast <- grep("Mast",levels(rna.df$cell.type))
b <- grep("B cell",levels(rna.df$cell.type))

##将上述的细胞类型进行矩阵的合并
cell.types.idx <- c(epi,fibro,smooth,endo,t.nk,mac,mast,b)
##开始进行字符替换
store <- numeric(0)
for(i in 1:length(cell.types.idx)){
  name <- levels(rna.df$cell.type)[cell.types.idx[i]]
  print(gsub("-.*","",name))
  new.name <- gsub("-.*","",name)
  new.num <- as.numeric(new.name)
  store[i] <- new.num
}
print(store)
#####################################################

my_levels <- c(11,20,21,22,31,
               19,34,
               3,
               9,10,
               16,17,
               0,27,
               6,8,12,14,15,18,24,25,26,29,
               7,23,
               1,33,
               2,4,30,
               5,13,
               32,
               28,35)

# Relevel object@ident
rna.df$cluster.new <- factor(x = rna.df$cluster, levels = my_levels)

##自定义渐变色板 colorRampPalette() 
epithelial.cols <- colorRampPalette(c("#A0E989", "#245719"))
epithelial.cols <- epithelial.cols(14)
fibro.cols <-colorRampPalette(c("#FABFD2", "#B1339E"))
fibro.cols <- fibro.cols(10)
smooth.cols <- c("#b47fe5","#d8b7e8")
endo.cols <- c("#93CEFF","#4A99FF")
t.cols <- c("gray60","gray20","gray40")
macro.cols <- c("#ff6600","#ff9d5c")
mast.cols <- "gold3"
b.cols <- c("#B22222","#CD5C5C")
cols <- c(epithelial.cols,fibro.cols,smooth.cols,endo.cols,t.cols,macro.cols,mast.cols,b.cols)

p1 <- ggplot(rna.df,aes(x = UMAP_1,y=UMAP_2,color = cluster.new)) 
  geom_point(size = .1) 
  theme_classic() 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2")  
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = cols) 
  guides(colour = guide_legend(override.aes = list(size=6))) ggsave("Cell_Type_RNA.pdf",width = 8,height = 7)

p1 <- ggplot(rna.df,aes(x = UMAP_1,y=UMAP_2,color = cluster.new)) 
  geom_point(size = .1) 
  theme_classic() 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2")  
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = cols) 
  guides(colour = guide_legend(override.aes = list(size=6))) NoLegend()
LabelClusters(p1,id="cluster.new",color="black",repel = T,size=8) ggsave("Cell_Type_RNA-labels.pdf",width = 8,height = 7)


# ATAC
levels(factor(atac$predictedGroup_ArchR))

atac.df <- plotEmbedding(atac,colorBy = "cellColData",name = "predictedGroup_ArchR",embedding = "UMAP")
atac.df <- as.data.frame(atac.df$data)
atac.df$cell.type <- sub(".*?-","",atac.df$color)
atac.df$cell.type <- as.factor(atac.df$cell.type)
atac.df$cell.type <- str_replace_all(atac.df$cell.type,"23-Stromal fibroblast","23-Smooth muscle cells")
atac.df$cluster <-  gsub("-.*","",atac.df$cell.type)

my_levels <- c(11,20,21,22,31,
               19,34,
               3,
               9,10,
               16,17,
               0,27,
               6,8,12,14,15,18,24,25,26,29,
               7,23,
               1,33,
               2,4,30,
               5,13,
               32,
               28,35)

# Relevel object@ident
atac.df$cluster.new <- factor(x = atac.df$cluster, levels = my_levels)
epithelial.cols <- colorRampPalette(c("#A0E989", "#245719"))
epithelial.cols <- epithelial.cols(14)
fibro.cols <-colorRampPalette(c("#FABFD2", "#B1339E"))
fibro.cols <- fibro.cols(10)
smooth.cols <- c("#b47fe5","#d8b7e8")
endo.cols <- c("#93CEFF","#4A99FF")
t.cols <- c("gray60","gray20","gray40")
macro.cols <- c("#ff6600","#ff9d5c")
mast.cols <- "gold3"
b.cols <- c("#B22222","#CD5C5C")

cols <- c(epithelial.cols,fibro.cols,smooth.cols,endo.cols,t.cols,macro.cols,mast.cols,b.cols)

p1 <- ggplot(atac.df,aes(x =x,y=y,color = cluster.new)) 
  geom_point(size = .1) 
  theme_classic() 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2")  
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = cols) 
  guides(colour = guide_legend(override.aes = list(size=6))) ggsave("Cell_Type_ATAC.pdf",width = 8,height = 7)
p1 <- ggplot(atac.df,aes(x = x,y=y,color = cluster.new)) 
  geom_point(size = .1) 
  theme_classic() 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2")  
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = cols) 
  guides(colour = guide_legend(override.aes = list(size=6))) NoLegend()
LabelClusters(p1,id="cluster.new",color="black",repel = T,size=8) ggsave("Cell_Type_ATAC-labels.pdf",width = 8,height = 7)
##上面的两部分应该是fig1的B和C图,如果自己在做可视化的时候,调整一下颜色的内容,还有样本的数据集命名,可视化的代码应该也是可以直接拿来用的

#########################################################################################################
##FIG1的D图,样本所占的百分比的堆积图
# Patient proportion per subcluster in RNA:
meta <- rna@meta.data
df <- meta %>% group_by(RNA_snn_res.0.7) %>% count(Sample)
colnames(df) <- c("Cluster","Sample","Cells")
# Reorder cluster factor levels to group by cell type 
levels(factor(rna$cell.type))
df %>% 
  dplyr::mutate(cell.type = factor(Cluster,levels = c(11,20,21,22,31,
                                                      19,34,
                                                      3,
                                                      9,10,
                                                      16,17,
                                                      0,27,
                                                      6,8,12,14,15,18,24,25,26,29,
                                                      7,23,
                                                      1,33,
                                                      2,4,30,
                                                      5,13,
                                                      32,
                                                      28,35))) %>% 
  ggplot(aes(fill=Sample, y=Cells, x= fct_rev(cell.type)))   
  geom_bar(position="fill", stat="identity") 
  coord_flip() theme_classic() xlab("Clusters") ylab("# of cells") 
  scale_fill_manual(values = sampleColors) ggsave("Cell_Type_Prop_RNA.pdf",width = 4,height = 8)

# Patient proportion per subcluster in ATAC:
meta <- as.data.frame(atac@cellColData)
meta$predictedGroup_ArchR <- gsub("-.*", "", meta$predictedGroup_ArchR)

df <- meta %>% group_by(predictedGroup_ArchR) %>% count(Sample)
colnames(df) <- c("Cluster","Sample","Cells")

# Reorder cluster factor levels to group by cell type 
levels(factor(atac$predictedGroup_ArchR))
df %>% 
  dplyr::mutate(cell.type = factor(Cluster,levels = c(11,20,21,22,31,
                                                      19,34,
                                                      3,
                                                      9,10,
                                                      16,17,
                                                      0,27,
                                                      6,8,12,14,15,18,24,25,26,29,
                                                      7,23,
                                                      1,33,
                                                      2,4,30,
                                                      5,13,
                                                      32,
                                                      28,35))) %>% 
  ggplot(aes(fill=Sample, y=Cells, x= fct_rev(cell.type)))   
  geom_bar(position="fill", stat="identity") 
  coord_flip() theme_classic() xlab("Clusters") ylab("# of cells") 
  scale_fill_manual(values = sampleColors) ggsave("Cell_Type_Prop_ATAC.pdf",width = 4,height = 8)
##这里的可视化的方式与肺癌的文章也很像,只是颜色的布局有些不同

# Write cluster total # of cells to output files
total.atac <- as.data.frame(table(atac$predictedGroup_ArchR))
colnames(total.atac) <- c("Cluster","ATAC cells")

total.rna <- as.data.frame(table(rna$cell.type))
colnames(total.rna) <- c("Cluster","RNA cells")

total <- merge(total.rna,total.atac,by = "Cluster")
WriteXLS::WriteXLS(total,"./Total_Cell_Numbers_per_Cluster.xlsx")


# Suppl. Figure CNV plot
# Plot CNV box: 
meta <- rna@meta.data
meta$cluster <- factor(meta$RNA_snn_res.0.7,levels = rev(c(11,20,21,22,31,
                                                           19,34,
                                                           3,
                                                           9,10,
                                                           16,17,
                                                           0,27,
                                                           6,8,12,14,15,18,24,25,26,29,
                                                           7,23,
                                                           1,33,
                                                           2,4,30,
                                                           5,13,
                                                           32,
                                                           28,35)))

ggplot(meta,aes(x=cluster,y=Total_CNVs,fill=cluster)) geom_boxplot() coord_flip() 
  theme_classic() scale_fill_manual(values = rev(cols)) NoLegend() 
  ggsave("CNV_BoxPlot.pdf",width = 4,height = 8)

writeLines(capture.output(sessionInfo()), "sessionInfo.txt")

总结

以上的代码可以分为两个部分,作者分别对rna及atac的数据进行可视化,先是对每个样品的来源进行umap可视化,随后加入了人工注释的结果进行可视化,去比较不一样的分组的结果,这部分可以加入不同的处理,比如正常组及癌症的组,只是在代码中,选用了不同的分组来源,这部分的内容在肺癌的文章也有相关的代码,也是可以直接拿来用的额。

第二个部分是分群的堆积图,也是很多单细胞的文章中用到的,也是比较经典的图,这两篇文章中所用的代码是类似的,只是所用选用的数据集不一致的,因此友友在进行学习的时候,可以摘取自己觉得可以百搭的代码进行个人整理,便于后续可视化的时候不用翻代码了。

0 人点赞