单细胞韧皮部研究代码解析2--comparison_denyer2019.R

2023-04-08 18:16:23 浏览数 (1)

单细胞韧皮部研究代码解析1-QC_filtering.R:https://cloud.tencent.com/developer/article/2256814?areaSource=&traceId=

今天继续给大家分享这篇作者的代码,在很多人做单细胞数据分析的时候,,目前是伴随单细胞组学的发展,如何将前人发表的单细胞转录组数据与获得的单细胞数据进行整合,这篇文章的作者提供了一个思路。

代码来源:https://github.com/tavareshugo/publication_Otero2022_PhloemPoleAtlas/blob/master/analysis/02-comparison_denyer2019.R

代码解析

代码语言:javascript复制
###data及R包的读入
###作者这里选用的是scater进行后续的分析,这里需要自己提前安装,可以用conda先构建一个scater的环境,后续lib.loc进行调用
library(data.table)
library(scater)
library(scran)
library(ggplot2)
library(patchwork)

# set seed for reproducible results
set.seed(1001)

# set ggplot2 theme
theme_set(theme_bw()   theme(text = element_text(size = 14)))

# source util functions
#这里还是作者在github上分享的自己的R代码,其中有getReducedDim函数,需要提前下载下来
source("analysis/functions/utils.R")

# Marker genes whose promoters were used for cell sorting
markers <- data.table(name = c("APL", "MAKR5", "PEARdel", "S17", "sAPL"),
                      id = c("AT1G79430", "AT5G52870", "AT2G37590", "AT2G22850", "AT3G12730"))


# Read data ---------------------------------------------------------------

# ring data both soft and hard filtered
ring_soft <- readRDS("data/processed/SingleCellExperiment/ring_batches_softfilt.rds")
ring_hard <- readRDS("data/processed/SingleCellExperiment/ring_batches_hardfilt.rds")

# all batches, both soft and hard filtered
all_soft <- readRDS("data/processed/SingleCellExperiment/all_batches_softfilt.rds")
all_soft$dataset <- ifelse(grepl("denyer", all_soft$Sample), "Denyer et al 2019", "ring")

all_hard <- readRDS("data/processed/SingleCellExperiment/all_batches_hardfilt.rds")
all_hard$dataset <- ifelse(grepl("denyer", all_hard$Sample), "Denyer et al 2019", "ring")



# Comparison with Denyer - hard filter ------------------------------------

# plot things together
##作者前面已经确定了UMAP-MNN_30是最符合自己数据集的降维方式,所以这里自己调用的时候需要更改
##这里是根据ggthemes这个包的不同颜色进行填充对上面读入的all_hard data 进行可视化:scale_colour_tableau
p1 <- ggplot(getReducedDim(all_hard, "UMAP-MNN_30"), aes(V1, V2, group = cluster_mnn))  
  geom_point(aes(colour = cluster_mnn), show.legend = FALSE)  
  geom_label(stat = "centroid", aes(label = cluster_mnn), size = 3,
             label.padding = unit(0.1, "lines"))  
  ggthemes::scale_colour_tableau(palette = "Tableau 20")  
  labs(x = "UMAP 1", y = "UMAP 2")

p2 <- ggplot(getReducedDim(all_hard, "UMAP-MNN_30"), aes(V1, V2))  
  geom_point(aes(colour = dataset))  
  labs(x = "UMAP 1", y = "UMAP 2")  
  scale_colour_brewer(palette = "Dark2")

p3 <- ggplot(getReducedDim(all_hard, "UMAP-MNN_30"), aes(V1, V2))  
  ggpointdensity::geom_pointdensity(show.legend = FALSE)  
  labs(x = "UMAP 1", y = "UMAP 2")  
  scale_colour_viridis_c(option = "magma")  
  facet_grid(~ dataset)

(p1 | p2) / p3   plot_annotation(tag_levels = "A")   plot_layout(guides = "collect")


# plot genes mentioned in Denyer2019 Fig 2D
temp <- getReducedDim(all_hard, "UMAP-MNN_30", genes = c("AT3G58190", "AT1G79430", "AT5G47450"),
              melted = TRUE)
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ id)  
  labs(x = "UMAP 1", y = "UMAP 2")


# Comparison with Denyer - soft filter -------------------------------------
***
# plot things together
# cluster_mnn:这里是作者的分群数据
# dataset:拟整合的Denyer 的数据集
p1 <- ggplot(getReducedDim(all_soft, "UMAP-MNN_30"), aes(V1, V2, group = cluster_mnn))  
  geom_point(aes(colour = cluster_mnn), show.legend = FALSE)  
  geom_label(stat = "centroid", aes(label = cluster_mnn), size = 3,
             label.padding = unit(0.1, "lines"))  
  ggthemes::scale_colour_tableau(palette = "Tableau 20")  
  labs(x = "UMAP 1", y = "UMAP 2")

p2 <- ggplot(getReducedDim(all_soft, "UMAP-MNN_30"), aes(V1, V2))  
  geom_point(aes(colour = dataset))  
  labs(x = "UMAP 1", y = "UMAP 2")  
  scale_colour_brewer(palette = "Dark2")

p3 <- ggplot(getReducedDim(all_soft, "UMAP-MNN_30"), aes(V1, V2))  
  ggpointdensity::geom_pointdensity(show.legend = FALSE)  
  labs(x = "UMAP 1", y = "UMAP 2")  
  scale_colour_viridis_c(option = "magma")  
  facet_grid(~ dataset)

(p1 | p2) / p3   plot_annotation(tag_levels = "A")   plot_layout(guides = "collect")
***

# plot genes mentioned in Denyer2019 Fig 2D
# 选取部分已知的marker 基因进行可视化
temp <- getReducedDim(all_soft, "UMAP-MNN_30", genes = c("AT3G58190", "AT1G79430", "AT5G47450"),
                      melted = TRUE)
temp$expr <- ifelse(temp$expr == 0, NA, temp$expr)
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ id)  
  labs(x = "UMAP 1", y = "UMAP 2")


# Contaminating layers ----------------------------------------------------
# plot genes identifying collumela and epidermis
# 选用已知的中柱和表皮的marker 基因,对all_hard和all_soft数据集进行可视化

# hard filtered data
temp <- getReducedDim(all_hard, "UMAP-MNN_30",
                      genes = c("AT1G79580", "AT3G29810", "AT5G58580", "AT5G14750"),
                      melted = TRUE)
temp$expr <- ifelse(temp$expr == 0, NA, temp$expr)
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ id)  
  labs(x = "UMAP 1", y = "UMAP 2")


# soft filtered data
temp <- getReducedDim(all_soft, "UMAP-MNN_30",
                      genes = c("AT1G79580", "AT3G29810", "AT5G58580", "AT5G14750"),
                      melted = TRUE)
temp$expr <- ifelse(temp$expr == 0, NA, temp$expr)
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ id)  
  labs(x = "UMAP 1", y = "UMAP 2")


# Collumela
temp <- getReducedDim(all_hard, "UMAP-MNN_30", genes = c("AT1G79580", "AT1G13620", "AT2G04025"),
                      melted = TRUE)
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ id)  
  labs(x = "UMAP 1", y = "UMAP 2")

# Epidermis - actually AT2G46410 is also expressed in "stele"
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC150583/
temp <- getReducedDim(all_hard, "UMAP-MNN_30", genes = c("AT5G14750", "AT1G79840", "AT2G46410"),
                      melted = TRUE)
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ id)  
  labs(x = "UMAP 1", y = "UMAP 2")

# Genes used for sorting
# 前期用于分选的marker基因进行可视化
temp <- getReducedDim(all_hard, "UMAP-MNN_30",
                      genes = unique(markers$id), melted = TRUE)
temp <- merge(temp, markers, by = "id")
ggplot(temp[order(expr, na.last = FALSE)], aes(V1, V2))  
  geom_point(aes(colour = expr))  
  scale_colour_viridis_c()  
  facet_grid(dataset ~ name)  
  labs(x = "UMAP 1", y = "UMAP 2")

总结

通过前面的代码解析,发现是作者是提前把denyer的数据进行了整合,主要的可视化的代码是*** ***里面的内容。首先时作者读入了soft和hard 的data,把自己以前进行分选的marker基因及已知的marker基因进行整合数据集的可视化,去表明整合后的数据集都能定位到相似的位置,验证自己的数据集的可靠性。

0 人点赞