单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析1:https://cloud.tencent.com/developer/article/2055573
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析2:https://cloud.tencent.com/developer/article/2072069
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析3:https://cloud.tencent.com/developer/article/2078159
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析4:https://cloud.tencent.com/developer/article/2078348
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析5:https://cloud.tencent.com/developer/article/2084580
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析6:https://cloud.tencent.com/developer/article/2085385
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析7:https://cloud.tencent.com/developer/article/2085705
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析8:https://cloud.tencent.com/developer/article/2086805
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析9:https://cloud.tencent.com/developer/article/2087563
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析10:https://cloud.tencent.com/developer/article/2090290
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析11:https://cloud.tencent.com/developer/article/2093123
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析12:https://cloud.tencent.com/developer/article/2093208
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析13:https://cloud.tencent.com/developer/article/2093567
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析14: https://cloud.tencent.com/developer/article/2094034
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析15:https://cloud.tencent.com/developer/article/2102671
单细胞代码解析-妇科癌症单细胞转录组及染色质可及性分析16:https://cloud.tencent.com/developer/article/2103030
这部分是作者为了寻找转录因子footpring进行的分析,主要是对相关的方法进行了改进
Part I: set intersection of distal regulatory elements with differentially accessible peaks
代码语言:javascript复制library(ArchR)
library(ChIPpeakAnno)
library(stats)
ArchR::addArchRThreads(threads = 32)
source("./Archr_Peak_Null_Permute.R")
source("./Archr_Peak_RawPval.R")
source("./getMatrixFromProject.R")
h5disableFileLocking()
SAMPLE.ID <- "All"
key.word.1 <- "pithelia"
key.word.2 <- "-Ciliated"
proj <- readRDS("./final_archr_proj_archrGS.rds")
p2g.df.obs <- readRDS("./All_P2G_Observed.rds")
#Subset to postive correlation P2Gs
p2g.df.sub <- dplyr::filter(p2g.df.obs,RawPVal <=1e-12 & Correlation >= 0.45& peakType == "Distal")
# Marker Peaks for malignant clusters with 100% patient specificity
markersPeaks <- getMarkerFeatures(
ArchRProj = proj,
useMatrix = "PeakMatrix",
groupBy = "predictedGroup_ArchR",
bias = c("TSSEnrichment", "log10(nFrags)"),
testMethod = "wilcoxon"
)
markerList <- getMarkers(markersPeaks, cutOff = "FDR <= 0.01 & Log2FC >= 1.25")
saveRDS(markerList,"markerspeaks.rds")
print(names(markerList))
idx.1 <- grep(key.word.1,names(markerList))
idx.2 <- grep(key.word.2,names(markerList))
idx.3 <- grep("16-Fibroblast",names(markerList))
idx.4 <- grep("0-Fibroblast",names(markerList))
idx.5 <- grep("27-Fibroblast",names(markerList))
idx <- c(idx.1,idx.2,idx.3,idx.4,idx.5)
markerList.sub <- markerList[idx]
print(names(markerList.sub))
# Only tumor cell clusters that are 100% patient specific
df <- as.data.frame(proj@cellColData) %>% dplyr::group_by(predictedGroup_ArchR) %>%
dplyr::count(Sample)
idents <- table(df$predictedGroup_ArchR)
idents.sub <- idents[idents ==1 & names(idents) %in% names(markerList.sub)]
markerList.sub <- markerList.sub[names(idents.sub)]
print(names(markerList.sub))
# Intersect marker peaks for each cluster with P2Gs and write to bed file
for ( i in names(markerList.sub)){
data <- unlist(markerList.sub[i])
markers <- paste0(data$seqnames,":",data$start,"-",data$end)
markers.int <- intersect(p2g.df.sub$peakName,markers)
markers.int <- stringr::str_replace(string=markers.int,pattern = ":",replacement = "-")
bed <- data.frame(do.call(rbind, strsplit(markers.int, "-", fixed=TRUE)))
write.table(bed,paste0("Marker_Enhancers_ArchR_",i,".bed"),row.names = F,col.names = F,quote = F,sep = "t")
}
# Concatenate marker peaks from each cluster and remove redundant peaks
data <- unlist(markerList.sub)
markers <- paste0(data$seqnames,":",data$start,"-",data$end)
markers.int <- intersect(p2g.df.sub$peakName,markers)
markers.int <- unique(markers.int)
markers.int <- stringr::str_replace(string=markers.int,pattern = ":",replacement = "-")
bed <- data.frame(do.call(rbind, strsplit(markers.int, "-", fixed=TRUE)))
write.table(bed,"Marker_Enhancers_ArchR-nodups.bed",row.names = F,col.names = F,quote = F,sep = "t")
writeLines(capture.output(sessionInfo()), "sessionInfo_Intersect_DiffPeaks_P2Gs.txt")
Part II: Generating the input matrices for the TFSEE computation
代码语言:javascript复制#!/bin/bash
#SBATCH --job-name Reformat
#SBATCH --cpus-per-task 1
#SBATCH -c 1
#SBATCH --mem 2g
#SBATCH --partition allnodes
# Remove whitespace from file names
for file in *; do mv "$file" `echo $file | tr ' ' '_'` ; done
代码语言:javascript复制library(ArchR)
source("./Archr_Peak_Null_Permute.R")
source("./Archr_Peak_RawPval.R")
source("./getMatrixFromProject.R")
library(stringr)
library(utils)
library(dplyr)
ArchR::addArchRThreads(threads = 32)
h5disableFileLocking()
# ONlY USE 100% patient-specfic
labels <- list.files(pattern = "Marker_Enhancers_ArchR")
labels <- str_remove(labels,"Marker_Enhancers_ArchR_")
labels <- str_remove(labels,".bed")
labels <- labels[-12]# Remove extra
print(labels)
enhancers <- read.delim("Marker_Enhancers_ArchR-nodups.bed",header = F)
enhancers <- paste0(enhancers$V1,":",enhancers$V2,"-",enhancers$V3)
# Pseudobulk ATAC enhancer matrix
proj.archr <- readRDS("./final_archr_proj_archrGS-P2Gs.rds")
peak.mat <- getMatrixFromProject.mod(proj.archr,useMatrix = "PeakMatrix")
cell.names <- colnames(assay(peak.mat))
peak.names <- paste0(seqnames(rowRanges(peak.mat)),":", start(ranges(rowRanges(peak.mat))),"-", end(ranges(rowRanges(peak.mat))))
peak.mat <- assay(peak.mat)
dim(peak.mat)
length(cell.names)
length(peak.names)
colnames(peak.mat) <- cell.names
rownames(peak.mat) <- peak.names
head(peak.mat[,1:4])
peaks.pseudobulk <- data.frame(rownames(peak.mat))
# Run twice to hit both whitespaces
proj.archr$predictedGroup_ArchR <- str_replace(proj.archr$predictedGroup_ArchR," ","_")
proj.archr$predictedGroup_ArchR <- str_replace(proj.archr$predictedGroup_ArchR," ","_")
for (i in labels){
cells <- rownames(dplyr::filter(as.data.frame(proj.archr@cellColData),predictedGroup_ArchR == i))
peak.mat.sub <- peak.mat[,colnames(peak.mat) %in% cells]
peaks.bulk <- Matrix::rowSums(peak.mat.sub)
peaks.pseudobulk$i <- peaks.bulk
colnames(peaks.pseudobulk)[dim(peaks.pseudobulk)[2]] <- i
print("Iteration complete")
}
rownames(peaks.pseudobulk) <- peaks.pseudobulk[,1]
peaks.pseudobulk <- peaks.pseudobulk[,-1]
print(labels)
print(colnames(peaks.pseudobulk))
dim(peaks.pseudobulk)
head(peaks.pseudobulk[,1:6])
peaks.pseudobulk <- peaks.pseudobulk[rownames(peaks.pseudobulk) %in% unique(enhancers),]
dim(peaks.pseudobulk)
# Remove enhancer peaks that have ANY zero counts in ANY sample (no replicates)
peaks.pseudobulk[peaks.pseudobulk == 0] <- NA
peaks.pseudobulk <- peaks.pseudobulk[complete.cases(peaks.pseudobulk),]
dim(peaks.pseudobulk)
head(peaks.pseudobulk[,1:6])
# Subset original Marker enhancer lists to new updated nonzero enhancers
for ( i in 1:length(labels)){
file <- paste0("Marker_Enhancers_ArchR_",labels[i],".bed")
bed <- read.delim(file,header = F)
rownames(bed) <- paste0(bed$V1,":",bed$V2,"-",bed$V3)
bed.new <- bed[rownames(bed) %in% rownames(peaks.pseudobulk),]
write.table(bed.new,paste0("Marker_Enhancers_ArchR_",labels[i],"-updated.bed"),row.names = F,col.names = F,quote = F,sep = "t")
}
writeLines(capture.output(sessionInfo()), "sessionInfo_TFSEE_Step1_NonZero_Enhancers.txt")