单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析7

2022-08-29 11:45:49 浏览数 (3)

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析1:https://cloud.tencent.com/developer/article/2055573

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析2:https://cloud.tencent.com/developer/article/2072069

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析3:https://cloud.tencent.com/developer/article/2078159

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析4:https://cloud.tencent.com/developer/article/2078348

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析5:https://cloud.tencent.com/developer/article/2084580

单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析6:https://cloud.tencent.com/developer/article/2085385

图片.png图片.png

代码解析

主要是为了对bed数据进行分析,将atac的数据与rna的数据进行整合。

代码语言:javascript复制
###########################################################
# Matt Regner
# Franco Lab
# Date: May-December 2020
# 
# Sample: All 
# Description: This script performs the following tasks  
#         1) scATAC-seq processing
#         2) scRNA-seq/scATAC-seq integration
#         3) Peak calling
###########################################################
.libPaths('/home/regnerm/anaconda3/envs/scENDO_scOVAR/lib/R/library')


source("./filterDoublets_modified.R")
###############################################################
library(scater)
library(dplyr)
library(Seurat)
library(patchwork)
library(SingleCellExperiment)
library(scater)
library(ComplexHeatmap)
## ConsensusClusterPlus:一步到位的一致性聚类
library(ConsensusClusterPlus)
##msigdbr:基因集数据库
library(msigdbr)
##fgsea单细胞转录组GSEA富集分析
library(fgsea)
library(dplyr)
library(tibble)
library(Signac)
library(ggplot2)
library(stringr)
library(EnsDb.Hsapiens.v86)
library(Seurat)
library(Signac)
library(ggplot2)
library(ensembldb)
library(EnsDb.Hsapiens.v86)
library(ArchR)
library(SingleR)
#viridis:绘图R包
library(viridis)
set.seed(123)
addArchRThreads(threads = 24) 
addArchRGenome("hg38")

# Set up directories and file variables:
##################################################################################################
SAMPLE.ID <- "All"
output.dir <- "."
####################
setwd(output.dir)
####################
inputFiles <- list.files(pattern = "\.gz$")
##给予样本来源
sampleNames <- c("3533EL","3571DL","36186L","36639L","366C5L","37EACL","38FE7L","3BAE2L","3CCF1L","3E4D1L","3E5CFL")
##读取前面所call出来的bed文件
encode.all <- read.delim("./GRCh38-ccREs.bed",header =F)
colnames(encode.all)[1:3] <- c("seqnames","start","end")
# Store patient metadata and colors:
# Make patient sample metadata and color assignments 

##RColorBrewer::brewer.pal:调色板
sampleColors <- RColorBrewer::brewer.pal(11,"Paired")
sampleColors[11] <- "#8c8b8b"
pie(rep(1,11), col=sampleColors) 

# Color patient tumors to resemble the cancer ribbon color 
sampleColors <- c(sampleColors[5],sampleColors[7],sampleColors[6],sampleColors[8],sampleColors[10],sampleColors[9],sampleColors[4],sampleColors[3],sampleColors[2],sampleColors[11],sampleColors[1])
sampleAnnot <- data.frame(Sample = c("3533EL","3571DL","36186L","36639L",
                                     "366C5L","37EACL","38FE7L","3BAE2L","3CCF1L","3E4D1L","3E5CFL"),
                          Color = sampleColors,
                          Cancer = c("endometrial","endometrial","endometrial","endometrial","endometrial","endometrial",
                                     "ovarian","ovarian","ovarian","ovarian","ovarian"),
                          Histology = c("endometrioid","endometrioid","endometrioid","endometrioid","endometrioid",
                                        "serous","endometrioid","serous","carcinosarcoma","GIST","serous"),
                          BMI = c(39.89,30.5,38.55,55.29,49.44,29.94,34.8,22.13,23.72,33.96,22.37),
                          Age = c(70,70,70,49,62,74,76,61,69,59,59),
                          Race = c("AA","CAU","CAU","CAU","CAU","CAU","CAU","CAU","CAU","CAU","AS"),
                          Stage = c("IA","IA","IA","IA","IA","IIIA","IA","IIB","IVB","IV","IIIC"),
                          Site = c("Endometrium","Endometrium","Endometrium","Endometrium","Endometrium","Ovary","Ovary","Ovary","Ovary","Ovary","Ovary"),
                          Type = c("Endometrial","Endometrial","Endometrial","Endometrial","Endometrial","Endometrial","Ovarian","Ovarian","Ovarian","Gastric","Ovarian"))

# # Read in matching scRNA-seq
# ###################################################################################################
rna <- readRDS("./endo_ovar_All_scRNA_processed.rds")
rna$cell.type <- str_replace(rna$cell.type,"/","_")
Idents(rna) <- "cell.type"

# Redo differential expression with new cell type markers 
Wilcox.markers <- readRDS("./wilcox_DEGs.rds")
Wilcox.markers$cluster <- str_replace(Wilcox.markers$cluster,"/","_")

# Create Arrow and ArchR project
##########################################################################
ArrowFiles <- createArrowFiles(
 inputFiles = inputFiles,
 sampleNames = sampleNames,
 filterTSS = 0, #Dont set this too high because you can always increase later
 filterFrags = 0,
 addTileMat = T,
 addGeneScoreMat = F
)
ArrowFiles <- list.files(pattern=".arrow")
##arrow移除doublets
doubScores <- addDoubletScores(
  input = ArrowFiles,
  k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
  knnMethod = "UMAP",useMatrix = "TileMatrix",nTrials=5,LSIMethod = 1,scaleDims = F,
  corCutOff = 0.75,UMAPParams = list(n_neighbors =30, min_dist = 0.3, metric = "cosine", verbose =FALSE),
  dimsToUse = 1:50
)
##创建ArchRProject工作环境
proj <- ArchRProject(
  ArrowFiles = ArrowFiles,
  outputDirectory = "All",
  copyArrows = T #This is recommened so that you maintain an unaltered copy for later usage.
)

# Filter out outlier low quality cells and doublets
###############################################################################
# GMM for fragments per cell
##使用mclust进行聚类分析
library(mclust)
for (i in sampleNames){
  proj.i <- proj[proj$Sample == i]

  # GMM for fragments per cell
  depth.clust <- Mclust(log10(proj.i$nFrags),G = 2)
  proj.i$depth.cluster <- depth.clust$classification
  proj.i$depth.cluster.uncertainty <- depth.clust$uncertainty

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = as.character(proj.i$depth.cluster),
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )   ggtitle(paste0("GMM classification:n",i," log10(fragments)")) 
    ggsave(paste0(i,"_depth.pdf"),width = 4,height = 4)

  # GMM for TSS per cell
  TSS.clust <- Mclust(log10(proj.i$TSSEnrichment 1),G = 2)
  proj.i$TSS.cluster <- TSS.clust$classification
  proj.i$TSS.cluster.uncertainty <- TSS.clust$uncertainty

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = as.character(proj.i$TSS.cluster),
    discrete = T,
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )   ggtitle(paste0("GMM classification:n",i," TSS Enrichment")) 
    ggsave(paste0(i,"_TSS.pdf"),width = 4,height = 4)


  df.TSS <- data.frame(proj.i$cellNames,proj.i$TSS.cluster,proj.i$TSS.cluster.uncertainty,proj.i$TSSEnrichment)
  df.TSS <- dplyr::filter(df.TSS,proj.i.TSS.cluster == "2")
  df.TSS <- dplyr::filter(df.TSS,proj.i.TSS.cluster.uncertainty <= 0.05)
  saveRDS(df.TSS,paste0("df_TSS_",i,".rds"))

  df.depth <- data.frame(proj.i$cellNames,proj.i$depth.cluster,proj.i$depth.cluster.uncertainty,proj.i$nFrags)
  df.depth <- dplyr::filter(df.depth,proj.i.depth.cluster == "2")
  df.depth <- dplyr::filter(df.depth,proj.i.depth.cluster.uncertainty <= 0.05)
  saveRDS(df.depth,paste0("df_depth_",i,".rds"))

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    colorDensity = T,
    continuousSet = "sambaNight",
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )  geom_hline(yintercept = log10(min(df.TSS$proj.i.TSSEnrichment) 1),linetype = "dashed") 
    geom_vline(xintercept = min(log10(df.depth$proj.i.nFrags)),linetype = "dashed") 
    ggtitle(paste0("QC thresholds:n",i)) 
    ggsave(paste0(i,"_QC.pdf"),width = 4,height = 4)

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = proj.i$DoubletEnrichment,
    discrete = F,
    continuousSet = "sambaNight",
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )  geom_hline(yintercept = min(log10(df.TSS$proj.i.TSSEnrichment 1)),linetype = "dashed") 
    geom_vline(xintercept = min(log10(df.depth$proj.i.nFrags)),linetype = "dashed") 
    ggtitle(paste0("Doublet Enrichment:n",i)) 
    ggsave(paste0(i,"_doublets.pdf"),width = 4,height = 4)

}

for (i in sampleNames[2]){
  proj.i <- proj[proj$Sample == i]

  # GMM for fragments per cell
  depth.clust <- Mclust(log10(proj.i$nFrags),G = 2)
  proj.i$depth.cluster <- depth.clust$classification
  proj.i$depth.cluster.uncertainty <- depth.clust$uncertainty
  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = as.character(proj.i$depth.cluster),
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )   ggtitle(paste0("GMM classification:n",i," log10(fragments)")) 
    ggsave(paste0(i,"_depth.pdf"),width = 4,height = 4)

  # Manually set TSS threshold
  #TSS.clust <- Mclust(log10(proj.i$TSSEnrichment 1),G = 2)
  proj.i$TSS.cluster <- ifelse(log10(proj.i$TSSEnrichment 1) >= 0.80,"2","1")
  proj.i$TSS.cluster.uncertainty <- rep(NA,nrow(proj.i@cellColData))
  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = as.character(proj.i$TSS.cluster),
    discrete = T,
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )   ggtitle(paste0("GMM classification:n",i," TSS Enrichment")) 
    ggsave(paste0(i,"_TSS.pdf"),width = 4,height = 4)
    
  df.TSS <- data.frame(proj.i$cellNames,proj.i$TSS.cluster,proj.i$TSS.cluster.uncertainty,proj.i$TSSEnrichment)
  df.TSS <- dplyr::filter(df.TSS,proj.i.TSS.cluster == "2")
  #df.TSS <- dplyr::filter(df.TSS,proj.i.TSS.cluster.uncertainty <= 0.05)
  saveRDS(df.TSS,paste0("df_TSS_",i,".rds"))

  df.depth <- data.frame(proj.i$cellNames,proj.i$depth.cluster,proj.i$depth.cluster.uncertainty,proj.i$nFrags)
  df.depth <- dplyr::filter(df.depth,proj.i.depth.cluster == "2")
  df.depth <- dplyr::filter(df.depth,proj.i.depth.cluster.uncertainty <= 0.05)
  saveRDS(df.depth,paste0("df_depth_",i,".rds"))

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    colorDensity = T,
    continuousSet = "sambaNight",
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )  geom_hline(yintercept = log10(min(df.TSS$proj.i.TSSEnrichment) 1),linetype = "dashed") 
    geom_vline(xintercept = min(log10(df.depth$proj.i.nFrags)),linetype = "dashed") 
    ggtitle(paste0("QC thresholds:n",i)) 
    ggsave(paste0(i,"_QC.pdf"),width = 4,height = 4)

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = proj.i$DoubletEnrichment,
    discrete = F,
    continuousSet = "sambaNight",
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )  geom_hline(yintercept = min(log10(df.TSS$proj.i.TSSEnrichment 1)),linetype = "dashed") 
    geom_vline(xintercept = min(log10(df.depth$proj.i.nFrags)),linetype = "dashed") 
    ggtitle(paste0("Doublet Enrichment:n",i)) 
    ggsave(paste0(i,"_doublets.pdf"),width = 4,height = 4)

}

for (i in sampleNames[7]){
  proj.i <- proj[proj$Sample == i]

  # GMM for fragments per cell
  depth.clust <- Mclust(log10(proj.i$nFrags),G = 2)
  proj.i$depth.cluster <- depth.clust$classification
  proj.i$depth.cluster.uncertainty <- depth.clust$uncertainty

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = as.character(proj.i$depth.cluster),
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )   ggtitle(paste0("GMM classification:n",i," log10(fragments)")) 
    ggsave(paste0(i,"_depth.pdf"),width = 4,height = 4)

  # Manually set TSS threshold
  #TSS.clust <- Mclust(log10(proj.i$TSSEnrichment 1),G = 2)
  proj.i$TSS.cluster <- ifelse(log10(proj.i$TSSEnrichment 1) >= 0.80,"2","1")
  proj.i$TSS.cluster.uncertainty <- rep(NA,nrow(proj.i@cellColData))

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = as.character(proj.i$TSS.cluster),
    discrete = T,
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )   ggtitle(paste0("GMM classification:n",i," TSS Enrichment")) 
    ggsave(paste0(i,"_TSS.pdf"),width = 4,height = 4)

  df.TSS <- data.frame(proj.i$cellNames,proj.i$TSS.cluster,proj.i$TSS.cluster.uncertainty,proj.i$TSSEnrichment)
  df.TSS <- dplyr::filter(df.TSS,proj.i.TSS.cluster == "2")
  #df.TSS <- dplyr::filter(df.TSS,proj.i.TSS.cluster.uncertainty <= 0.05)
  saveRDS(df.TSS,paste0("df_TSS_",i,".rds"))

  df.depth <- data.frame(proj.i$cellNames,proj.i$depth.cluster,proj.i$depth.cluster.uncertainty,proj.i$nFrags)
  df.depth <- dplyr::filter(df.depth,proj.i.depth.cluster == "2")
  df.depth <- dplyr::filter(df.depth,proj.i.depth.cluster.uncertainty <= 0.05)
  saveRDS(df.depth,paste0("df_depth_",i,".rds"))

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    colorDensity = T,
    continuousSet = "sambaNight",
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )  geom_hline(yintercept = log10(min(df.TSS$proj.i.TSSEnrichment) 1),linetype = "dashed") 
    geom_vline(xintercept = min(log10(df.depth$proj.i.nFrags)),linetype = "dashed") 
    ggtitle(paste0("QC thresholds:n",i)) 
    ggsave(paste0(i,"_QC.pdf"),width = 4,height = 4)

  ggPoint(
    x = log10(proj.i$nFrags),
    y = log10(proj.i$TSSEnrichment 1),
    color = proj.i$DoubletEnrichment,
    discrete = F,
    continuousSet = "sambaNight",
    xlabel = "log10(unique fragments)",
    ylabel = "log10(TSS Enrichment 1)"
  )  geom_hline(yintercept = min(log10(df.TSS$proj.i.TSSEnrichment 1)),linetype = "dashed") 
    geom_vline(xintercept = min(log10(df.depth$proj.i.nFrags)),linetype = "dashed") 
    ggtitle(paste0("Doublet Enrichment:n",i)) 
    ggsave(paste0(i,"_doublets.pdf"),width = 4,height = 4)

}
###############################################################################
dev.off()

# Filter out low quality cells, and remove doublets
##############################################################################

list.depth <- list.files(pattern = "^df_depth")

df.depth <-  data.frame(cellNames=character(),
                        cluster=character(),
                        cluster.uncertainty=character(),
                        nFrags = character())
for (i in list.depth){
  df <- readRDS(i)
  colnames(df) <- c("cellNames","cluster","cluster.uncertainty","nFrags")
  df.depth <- rbind(df.depth,df)
}

list.TSS <- list.files(pattern = "^df_TSS")

df.TSS <-  data.frame(cellNames=character(),
                      cluster=character(),
                      cluster.uncertainty=character(),
                      TSSEnrichment = character())
for (i in list.TSS){
  df <- readRDS(i)
  colnames(df) <- c("cellNames","cluster","cluster.uncertainty","TSSEnrichment")
  df.TSS <- rbind(df.TSS,df)
}

colnames(df.TSS) <- c("cellNames","TSS.cluster","TSS.cluster.uncertainty","TSSEnrichment")
colnames(df.depth) <- c("cellNames","depth.cluster","depth.cluster.uncertainty","nFrags")

cellsPass <- intersect(df.TSS$cellNames,df.depth$cellNames)
cellsFail <-  proj$cellNames[!(proj$cellNames %in% cellsPass)]

# Screen for high quality barcodes (remove non cellular barcodes)
proj.filter <- proj[proj$cellNames %in% cellsPass]
proj <- filterDoublets(proj.filter,filterRatio = 1,cutEnrich = 1,cutScore = -Inf)
plotFragmentSizes(proj) ggtitle("Fragment Size Histogram") ggsave("Frags_hist.pdf",width = 6,height = 4)
plotTSSEnrichment(proj) ggtitle("TSS Enrichment") ggsave("TSS.pdf",width = 6,height = 4)
###############################################################################################################

# Perform LSI reduction and clustering with ATAC data only
#######################################################################

# Add LSI dimreduc
proj <- addIterativeLSI(
  ArchRProj = proj,
  useMatrix = "TileMatrix",
  name = "IterativeLSI",
  iterations = 4,
  LSIMethod = 2,
  scaleDims = T,
  clusterParams = list( #See Seurat::FindClusters
    resolution = c(0.2),
    sampleCells = 10000,
    n.start = 10
  ),
  UMAPParams = list(n_neighbors =30,
                    min_dist = 0.3,
                    metric = "cosine",
                    verbose =FALSE),
  varFeatures = 25000,
  dimsToUse = 1:50,
  binarize = T,
  corCutOff = 0.75,
  force = T,
  seed=6
)

proj <- addClusters(
  input = proj,
  reducedDims = "IterativeLSI",
  method = "Seurat",
  name = "ATAC_clusters",
  resolution = 0.7,
  dimsToUse = 1:50,force = T
)
# Add UMAP based on LSI dims
proj <- addUMAP(proj,nNeighbors = 30,minDist = 0.3,dimsToUse = 1:50,metric = "cosine",force = T,reducedDims="IterativeLSI")
saveRDS(proj,"proj_LSI_AND_UMAP.rds")

###################################################################################################

# Estimate gene activity in ATAC data and perform cell type annotation:
# Add Gene activity matrix using ArchR model
proj <- addGeneScoreMatrix(proj,matrixName = "ArchRGeneScore",force = T)
getAvailableMatrices(proj)
saveRDS(proj,"proj_LSI_AND_GeneScores.rds")

#  Constrained Integration to only align cells from the same patient tumor
groupList <- SimpleList()
for (i in levels(factor(proj$Sample))){
  rna.sub <- rna[,rna$Sample == i]
  RNA.cells <- colnames(rna.sub)
  
  idxSample <- BiocGenerics::which(proj$Sample == i)
  cellsSample <- proj$cellNames[idxSample]
  proj.filter <- proj[cellsSample, ]
  ATAC.cells <- proj.filter$cellNames
  
  groupList[[i]] <- SimpleList(
    ATAC = ATAC.cells,
    RNA = RNA.cells
  )

} 
  
# ###########################################################################################
##addGeneIntegrationMatrix()对scATAC-seq和scRNA-seq 数据进行整合
proj <- addGeneIntegrationMatrix(
  ArchRProj = proj,
  useMatrix = "ArchRGeneScore",
  matrixName = "GeneIntegrationMatrix_ArchR",
  reducedDims = "IterativeLSI",
  seRNA = rna,
  groupList = groupList,
  addToArrow = T,
  force= TRUE,
  groupRNA = "cell.type",
  nameCell = "predictedCell_ArchR",
  nameGroup = "predictedGroup_ArchR",
  nameScore = "predictedScore_ArchR",
  plotUMAP = F,
  useImputation = F,
  transferParams = list(dims = 1:50)
)
getAvailableMatrices(proj)
saveRDS(proj,"proj_LSI_GeneScores_Annotations_Int.rds") 

############################################################################################
# Begin Downstream Analysis
##这里的内容比较值得看一下,前面是文件内容的替换,重点看后面的画图
#      1)  Plotting RNA/ATAC by sample, by cluster, by predicted label
#      2)  Marker Gene (RNA/ATAC) intersection
#      3)  Peak2GeneLinks/Coaccessiblity
######################################################

# PART 1: Plotting
######################################################################################

# Make embedding highlighting by 1) Predicted group ArchR 2) Predicted group Signac
# 3) Sample 4) ATAC-only clusters
atac.archr <- plotEmbedding(proj,colorBy = "cellColData",name = "predictedGroup_ArchR")
atac.archr.emb <- as.data.frame(atac.archr$data)
atac.archr.emb$cell.type.archr <- atac.archr.emb$color
atac.archr.emb$cell.type.archr <- sub("-", ":", atac.archr.emb$cell.type.archr)
atac.archr.emb$cell.type.archr <- gsub(".*:", "", atac.archr.emb$cell.type.archr)
head(atac.archr.emb)
atac.archr.emb$cell.type.archr <- factor(atac.archr.emb$cell.type.archr, levels = levels(as.factor(rna$cell.type)))
head(atac.archr.emb)


atac <- plotEmbedding(proj,colorBy = "cellColData",name = "Sample")
atac.emb.sample <- as.data.frame(atac$data)
atac.emb.sample$sample <- atac.emb.sample$color
atac.emb.sample$sample <- sub("-", ":", atac.emb.sample$sample )
atac.emb.sample$sample  <- gsub(".*:", "", atac.emb.sample$sample )
head(atac.emb.sample)
head(atac.emb.sample)


atac <- plotEmbedding(proj,colorBy = "cellColData",name = "ATAC_clusters")
atac.emb.cluster <- as.data.frame(atac$data)
atac.emb.cluster$sample <- atac.emb.cluster$color
atac.emb.cluster$sample <- sub("-", ":", atac.emb.cluster$sample )
atac.emb.cluster$sample  <- gsub(".*:", "", atac.emb.cluster$sample )
head(atac.emb.cluster)

atac.emb.all <- cbind(atac.archr.emb[,c(1:2,4)],
                      atac.emb.sample[,4],
                      atac.emb.cluster[,4])

atac.emb.all$plain <- "Plain"

colnames(atac.emb.all) <- c("UMAP1","UMAP2","Predicted.Group.ArchR",
                            "Sample","ATAC_clusters","Blank")

head(atac.emb.all)


ggplot(atac.emb.all,aes_string(x = "UMAP1",y="UMAP2",color = "Predicted.Group.ArchR")) 
  geom_point(size = .1) 
  theme_classic() 
  ggtitle("scATAC-seq: Predicted Group ArchR") 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2") 
  theme(legend.key.size = unit(0.2, "cm")) 
  guides(colour = guide_legend(override.aes = list(size=3))) 
  ggsave(paste0("PredictedGroup_ArchR_ATAC.pdf"),width = 8,height = 6)

ggplot(atac.emb.all,aes_string(x = "UMAP1",y="UMAP2",color = "Sample")) 
  geom_point(size = .1) 
  theme_classic() 
  ggtitle(paste0("scATAC-seq: Sample")) 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2") 
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = sampleColors)
  #guides(colour = guide_legend(override.aes = list(size=3))) 
  ggsave(paste0("Sample_ATAC.pdf"),width = 8,height = 6)

prediction.scores <- data.frame(ArchR= proj$predictedScore_ArchR)
var.list <- colnames(prediction.scores)

for (i in 1:length(var.list)){

  ggplot(prediction.scores,aes_string(x = var.list[i])) 
    geom_histogram(binwidth = 0.025,fill="#000000", color="#e9ecef", alpha=0.9) 
    theme_classic() 
    ggsave(paste0(var.list[i],"_ATAC.pdf"),width = 8,height = 6)

}

# Plot matching scRNA-seq plots:
#####################################################################################
rna.emb <- as.data.frame(rna@reductions$umap@cell.embeddings)
rna.emb$cell.type <- as.factor(rna$cell.type)
rna.emb$sample <- rna$Sample

rna.emb$cell.type <- factor(rna.emb$cell.type,levels = levels(atac.emb.all$Predicted.Group.ArchR))
rna.cell.plot <- ggplot(rna.emb,aes(x = UMAP_1,y=UMAP_2,color = cell.type)) 
  geom_point(size = .1) 
  theme_classic() 
  ggtitle("scRNAseq") 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2") 
  theme(legend.key.size = unit(0.2, "cm")) 
  guides(colour = guide_legend(override.aes = list(size=3)))
rna.cell.plot  ggsave("RNA_All_labels.pdf",width = 8,height = 6)

rna.emb$sample <- factor(rna.emb$sample,levels = levels(factor(atac.emb.all$Sample)))
rna.sample.plot <-ggplot(rna.emb,aes(x = UMAP_1,y=UMAP_2,color = sample)) 
  geom_point(size = .1) 
  theme_classic() 
  ggtitle("scRNAseq") 
  theme(plot.title = element_text(face = "bold")) 
  xlab("UMAP_1") 
  ylab("UMAP_2") 
  theme(legend.key.size = unit(0.2, "cm")) 
  scale_color_manual(values = sampleColors) 
  guides(colour = guide_legend(override.aes = list(size=3))) ggsave("RNA_All_sample.pdf")
rna.sample.plot  ggsave("RNA_All_samples.pdf",width = 8,height = 6)

#########################################################################################################
# # PART 2: Marker gene ovarlap RNA/ATAC
#
#
# # Differential pseudo-gene activity analysis:
# ############################
#
######################################################################################
#ArchR
########################################################################################
# DEGs using ATAC labels
markersGS.archr <- getMarkerFeatures(
  ArchRProj = proj,
  useMatrix = "ArchRGeneScore",
  groupBy = "ATAC_clusters",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)

heatmapGS.archr <- markerHeatmap(
  seMarker = markersGS.archr,
  cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
  labelMarkers =NULL,
  transpose = F,
  pal =  viridis(n=256),
  limits = c(-2,2)
)
ComplexHeatmap::draw(heatmapGS.archr, heatmap_legend_side = "bot", annotation_legend_side = "bot")
plotPDF(heatmapGS.archr , name = "GeneScores-Marker-Heatmap_ArchR", width = 8, height = 6, ArchRProj = proj, addDOC = FALSE)

# DEGs using predicted labels (removing small groups)
idxSample <- BiocGenerics::which(proj$predictedScore_ArchR > 0.5)
cellsSample <- proj$cellNames[idxSample]
proj.filter <- proj[cellsSample, ]

popular.groups <- summary(factor(proj.filter$predictedGroup_ArchR))
popular.groups <- popular.groups[popular.groups > 10]
proj.filter$Mode.Label <- ifelse(proj.filter$predictedGroup_ArchR %in% names(popular.groups),TRUE,FALSE)

idxSample <- BiocGenerics::which(proj.filter$Mode.Label == TRUE)
cellsSample <- proj.filter$cellNames[idxSample]
proj.filter <- proj.filter[cellsSample, ]

# DEGs using predicted labels
##ArchR还使用相同的 getMarkerFeatures () 函数支持标准差异测试
markersGS.archr.pred <- getMarkerFeatures(
  ArchRProj = proj.filter,
  useMatrix = "ArchRGeneScore",
  groupBy = "predictedGroup_ArchR",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)
heatmapGS.archr.pred <- markerHeatmap(
  seMarker = markersGS.archr.pred,
  cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
  labelMarkers =NULL,
  transpose = F,
  pal =  viridis(n=256),
  limits = c(-2,2)
)
ComplexHeatmap::draw(heatmapGS.archr.pred, heatmap_legend_side = "bot", annotation_legend_side = "bot")
plotPDF(heatmapGS.archr.pred, name = "GeneScores-Marker-Heatmap_ArchR_pred", width = 8, height = 6, ArchRProj = proj.filter, addDOC = FALSE)

# Differential peak analysis:
############################
# ATAC clusters
proj <- addGroupCoverages(ArchRProj = proj,groupBy = "ATAC_clusters",force = T)

pathToMacs2 <- findMacs2()

proj <- addReproduciblePeakSet(
  ArchRProj = proj,
  groupBy = "ATAC_clusters",
  pathToMacs2 = pathToMacs2,force = T
)
proj <- addPeakMatrix(proj,force = T)
proj <- addBgdPeaks(proj)

markersPeaks <- getMarkerFeatures(
  ArchRProj = proj,
  useMatrix = "PeakMatrix",
  groupBy = "ATAC_clusters",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)

heatmapPeaks<- markerHeatmap(
  seMarker = markersPeaks,
  cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
  labelMarkers =NULL,
  transpose = F,
  pal =  viridis(n=256),
  limits = c(-2,2)
)
ComplexHeatmap::draw(heatmapPeaks, heatmap_legend_side = "bot", annotation_legend_side = "bot")
plotPDF(heatmapPeaks, name = "Markers_peaks_ATAC_clusters", width = 8, height = 6, ArchRProj = proj, addDOC = FALSE)

# ArchR predicted labels
# DEGs using predicted labels (removing small groups)
idxSample <- BiocGenerics::which(proj$predictedScore_ArchR >= 0.5)
cellsSample <- proj$cellNames[idxSample]
proj.filter <- proj[cellsSample, ]

popular.groups <- summary(factor(proj.filter$predictedGroup_ArchR))
popular.groups <- popular.groups[popular.groups > 10]
proj.filter$Mode.Label <- ifelse(proj.filter$predictedGroup_ArchR %in% names(popular.groups),TRUE,FALSE)

idxSample <- BiocGenerics::which(proj.filter$Mode.Label == TRUE)
cellsSample <- proj.filter$cellNames[idxSample]
proj.filter <- proj.filter[cellsSample, ]

proj.archr <- addGroupCoverages(ArchRProj = proj.filter,groupBy = "predictedGroup_ArchR",force = T)
pathToMacs2 <- findMacs2()
proj.archr <- addReproduciblePeakSet(
  ArchRProj = proj.archr,
  groupBy = "predictedGroup_ArchR",
  pathToMacs2 = pathToMacs2,force = T
)
proj.archr <- addPeakMatrix(proj.archr,force = T)
proj.archr <- addBgdPeaks(proj.archr,force = T)
markersPeaks.archr <- getMarkerFeatures(
  ArchRProj = proj.archr,
  useMatrix = "PeakMatrix",
  groupBy = "predictedGroup_ArchR",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)

heatmapPeaks.archr<- markerHeatmap(
  seMarker = markersPeaks.archr,
  cutOff = "FDR <= 0.01 & Log2FC >= 1.25",
  labelMarkers =NULL,
  transpose = F,
  pal =  viridis(n=256),
  limits = c(-2,2)
)
ComplexHeatmap::draw(heatmapPeaks.archr, heatmap_legend_side = "bot", annotation_legend_side = "bot")
plotPDF(heatmapPeaks.archr , name = "Markers_peaks_Archr_Predicted_labels", width = 8, height = 6, ArchRProj =  proj.archr, addDOC = FALSE)

saveRDS(proj.archr,"./final_archr_proj_archrGS.rds")
# # RNA heatmap
####################################################################################
topN <-Wilcox.markers %>% group_by(cluster) %>% dplyr::filter(p_val_adj <= 0.01) %>% top_n(30, desc(avg_logFC))

# Downsample cells from each cluster
rna.sub <- subset(rna,downsample =300)
rna.sub <- NormalizeData(rna.sub)
rna.sub <- rna.sub[rownames(rna.sub) %in% topN$gene,]
rna.sub <- ScaleData(rna.sub,features = rownames(rna.sub))
mat <- rna.sub@assays$RNA@scale.data
cluster_anno<- rna.sub@meta.data$cell.type
col_fun = circlize::colorRamp2(c(-2, 0, 2),viridis(n = 3))

heatmapRNA <- Heatmap(mat, name = "Expression",
        column_split = factor(cluster_anno),
        cluster_columns =T,
        show_column_dend = F,
        cluster_column_slices = T,
        column_title_gp = gpar(fontsize = 8),
        column_gap = unit(0.1, "mm"),
        cluster_rows = T,
        show_row_dend = FALSE,
        col = col_fun,
        column_title_rot = 90,
        show_column_names = F)
plotPDF(heatmapRNA, name = "Heatmap_RNA", width = 8, height = 6)

##############
# END OF SCRIPT
##############

总结

看完了全部的代码后,发现还是跟上面的教程一样的问题,代码很冗余,看着看着就不知道作者干嘛了,有可能中间很很多的测试数据的内容,但是不是文章正式的图表,其中主要的几个热图的R包还有配色包、以及做医学的几个富集包可以看一下。

0 人点赞