作者,Evil Genius
上一篇我们完成了空间注释之后,立马就要来到我们的下一步分析,细胞的空间位置关系。也是空间转录组必备的分析之一。
其中包括不同细胞类型之间的空间关系
以及相同细胞类型的空间位置关系
课程上分享的代码均可实现,但是我们不走回头路,分享就分享新的。
首先定义分析空间细胞类型共定位的函数
代码语言:javascript
复制###篇幅限制,无法展示
大家还知道不同平台共定位需要考虑的邻居数么?
主脚本
代码语言:javascript
复制library(stringr)
library(cocor)
library(dplyr)
library(data.table)
library(stats)
library(scalop)
library(tibble)
library(ggrepel)
library(tidyr)
library(scales)
library(matrixStats)
# Run functions first
# co-localization ---------------------------------------------------------
#### log-transform expression matrices
m <- readRDS("test.rds")
m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ]
if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]}
scaling_factor <- 1000000/colSums(m)
m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
m_loged <- log2(1 (m_CPM/10))
# removing genes with zero variance across all cells
var_filter <- apply(m_loged, 1, var)
m_proc <- m_loged[var_filter != 0, ]
# filtering out lowly expressed genes
exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)]
m_proc <- m_proc[exp_genes, ]
# output to a list
per_sample_mat[[i]] <- m_proc
names(per_sample_mat)[i] <- sample_ls[[i]]
rm(path, files, m, m_CPM, m_loged, var_filter, exp_genes, m_proc)
#### co-localiztion
results<-list()
for(i in seq_along(names(all_decon))) {
decon <- all_decon[[sample_ls[i]]]
spot_list <- lapply(c(1:500),function(x){ #num. of shuffled matrices
all_row <- sapply(c(1:dim(decon)[2]),function(c){
new_row <- sample(c(1:length(row.names(decon))), length(row.names(decon)), replace = FALSE)
return(new_row)
})
new_mat <- sapply(c(1:dim(decon)[2]),function(c){
sh_row <- decon[,c][all_row[,c]]#generate many deconv matrices by shuffling cell types
return(sh_row)
})
colnames(new_mat) <- colnames(decon)
return(new_mat)
})
#run colocal_mat_fun -- perform colocalization on the shuffled decon matrices
mats_list<- lapply(spot_list, function(new_decon){
colocal_mats<-colocal_mat(new_decon)
return(colocal_mats)
})
mean_mat <- sapply(mats_list,function(x){ #expected colocalization calculated from shuffled decon matrices
return(x[,"ab_comb"])
})
pairs_shuf_mean <- apply(mean_mat,1,mean)
names(pairs_shuf_mean) <- mats_list[[1]][,"pairs"]
rownames(mean_mat) <- mats_list[[1]][,"pairs"]
obs_coloc <- colocal_mat(decon)
r1 = obs_coloc$ab_comb #observed colocal
r2 = pairs_shuf_mean #expected colocal
n = dim(decon)[1] #num total spots
fisher = ((0.5*log((1 r1)/(1-r1)))-(0.5*log((1 r2)/(1-r2))))/((1/(n-3)) (1/(n-3)))^0.5
p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
effect_size <- r1/r2 #observed/expected
results[[i]]<-data.frame(pvalue = p.value,
effect_size = effect_size)
}
names(results) <- sample_ls
results2<-lapply(results, dplyr::add_rownames, 'pairs')
results2<- lapply(results2,function(x){
as.matrix(x)
})
results2 <- lapply(results2, function(x){rownames(x) <- x[,1]; x})
df <- do.call("rbind", results2)
###volcano plot (all pairs, all samples)###
df->results_comb2
results_comb2<-as.data.frame(results_comb2)
results_comb2$pvalue<-as.numeric(results_comb2$pvalue)
results_comb2$effect_size<-as.numeric(results_comb2$effect_size)
results_comb2$sig <- "NO"
results_comb2$sig[results_comb2$effect_size >= 1.3 & results_comb2$pvalue <= 0.01] <- "enriched"
results_comb2$sig[results_comb2$effect_size <= 0.7 & results_comb2$pvalue <= 0.01] <- "depleted"
results_comb2$label <- NA
results_comb2$label[results_comb2$sig != "NO"] <- results_comb2$pairs[results_comb2$sig != "NO"]
ggplot(data=results_comb2, aes(x=effect_size, y=-log10(pvalue), col=sig, label=label))
geom_point()
theme_minimal()
geom_text_repel()
theme(text = element_text(size = 20))
代码语言:javascript
复制### Adjacency --------------------------------------------------------------
mp_assign <- readRDS("test.rds", sep = "")
all_zones <- readRDS("final_zones.rds")
extend_metadata <- tibble()
generate_metadata <- sapply(c(1:length(sample_ls)), function(i){
print(sample_ls[i])
# load data
spots_positions <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions) <- spots_positions$V1
spots_clusters <- readRDS("test.rds", sep = "")
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes
metadata <- tibble(Key = paste(sample_ls[i],spots_clusters$barcodes, sep="_"),
SpotID = spots_clusters$barcodes,
Sample = rep(sample_ls[i], nrow(spots_clusters)),
MPid = spots_clusters$spot_type,
array_row = spots_positions[spots_clusters$barcodes,"V3"],
array_col = spots_positions[spots_clusters$barcodes, "V4"],
pxl_in_rows = spots_positions[spots_clusters$barcodes, "V5"],
pxl_in_cols = spots_positions[spots_clusters$barcodes, "V6"],
Zone = as.character(all_zones[[i]][spots_clusters$barcodes]))
extend_metadata <<- rbind.data.frame(extend_metadata, metadata)
})
set.seed(50)
neighbs_stats <- neighbor_spot_props(metadata = extend_metadata,
zone = "All",
#site = "All",
samples = "All",
#zone_by = "EpiStroma",
n_cores = 30,
plot_perm_distr = TRUE,
n_perm = 10000,
filter_signif = TRUE,
zscore_thresh = 1)
# regional comp (Previously run by server. Possible to run per sample below)----------------------------------------------------------
#!/usr/bin/env Rscript
#args = commandArgs(trailingOnly = TRUE)
#sname <- as.character(args[1])
#sname <- str_replace(sname, "r", "")
#print(paste("I got the samp right", sname))
sname <- "" #insert sample name
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
mp_assign <- readRDS('test.rds')
return(unique(mp_assign$spot_type_meta_new))
}))))
max_win_size <- 15
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
# load data
spots_positions <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions) <- spots_positions$V1
spots_clusters <- readRDS('test.rds')
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes
neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
all_spots <- spots_clusters$barcodes
all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
proximity <- t(sapply(all_spots, function(spot){
win_spots <- c(spot)
sapply(c(1:win_size), function(i){
win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
})
win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
return(win_abund)
}))
old_clusters <- colnames(proximity)
add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
if (length(add_clusters) != 0){
sapply(add_clusters,function(clust){
proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
})
colnames(proximity)[c((length(old_clusters) 1):dim(proximity)[2])] <- add_clusters
}
proximity <- proximity[,sort(colnames(proximity))]
is_one <- apply(proximity,1,function(ro){
return(1 %in% ro )
})
proximity <- proximity[!is_one,]
all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
return(pair_cor)
})
all_cor <- data.frame(pair_cors = all_cor)
row.names(all_cor) <- pairs_names
return(all_cor)
})
sample_proximity <- as.data.frame(all_pval_windows)
row.names(sample_proximity) <- pairs_names
# regional comp random (Previously run by server. Possible to run per sample below) ---------------------------------------------------
#!/usr/bin/env Rscript
#args = commandArgs(trailingOnly = TRUE)
#sname <- as.character(args[1])
#sname <- str_replace(sname, "r", "")
sname <- "" #insert sample name
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
mp_assign <- readRDS('test.rds')
return(unique(mp_assign$spot_type_meta_new))
}))))
max_win_size <- 15
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
rand_num <- 500
# load data
spots_positions_orign <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions_orign) <- spots_positions_orign$V1
spots_clusters <- readRDS("test.rds", sep = "")
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes
all_rand <- lapply(c(1:rand_num),function(j){
new_pos_all <- sample(spots_positions_orign$V1[spots_positions_orign$V2 != 0], length(spots_positions_orign$V1[spots_positions_orign$V2 != 0]), replace = FALSE)
spots_positions <- spots_positions_orign
spots_positions$V1[spots_positions$V2 != 0] <- new_pos_all
neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
all_spots <- spots_clusters$barcodes
all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
proximity <- t(sapply(all_spots, function(spot){
win_spots <- c(spot)
sapply(c(1:win_size), function(i){
win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
})
win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
return(win_abund)
}))
old_clusters <- colnames(proximity)
add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
if (length(add_clusters) != 0){
sapply(add_clusters,function(clust){
proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
})
colnames(proximity)[c((length(old_clusters) 1):dim(proximity)[2])] <- add_clusters
}
proximity <- proximity[,sort(colnames(proximity))]
is_one <- apply(proximity,1,function(ro){
return(1 %in% ro )
})
proximity <- proximity[!is_one,]
all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
return(pair_cor)
})
all_cor <- data.frame(pair_cors = all_cor)
row.names(all_cor) <- pairs_names
return(all_cor)
})
sample_proximity <- as.data.frame(all_pval_windows)
row.names(sample_proximity) <- pairs_names
return(sample_proximity)
})
sample_mean_rand_prox <- Reduce(" ", all_rand) / length(all_rand)
sample_sd_rand_prox <- round(apply(array(unlist(all_rand), c(length(pairs_names), max_win_size, rand_num)), c(1,2), sd),4)
# regional comp downstream ------------------------------------------------
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
mp_assign <- readRDS('test.rds')
return(unique(mp_assign$spot_type_meta_new))
}))))
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
all_zones <- readRDS("Spatial_coh_zones/spatial_zonesv3.rds")
all_proximity_list <- list.files("Spatial_coh_zones/proximity_samples/")
all_proximity <- lapply(all_proximity_list, function(prox){
samp_prox <- readRDS('test.rds')
return(samp_prox)
})
names(all_proximity) <- sapply(str_split(all_proximity_list, "_"), function(x){return(x[1])})
all_proximity_rand_list <- list.files("Spatial_coh_zones/proximity_rand_samples/")
all_proximity_rand <- lapply(all_proximity_rand_list, function(prox){
samp_prox <- readRDS('test.rds')
return(samp_prox)
})
names(all_proximity_rand) <- sapply(str_split(all_proximity_rand_list, "_"), function(x){return(x[1])})
combined_proximity <- t(sapply(c(1:length(pairs_names)), function(i){
pair_prox <- sapply(all_proximity, function(x){
pair_sample_df <- as.data.frame(x[i,c(1:15)])
colnames(pair_sample_df) <- c(as.character(c(1:15)))
return(pair_sample_df)
})
return(apply(pair_prox,1,function(x){mean(na.omit(as.numeric(x)))}))
}))
row.names(combined_proximity) <- row.names(all_proximity[[1]])
Heatmap(na.omit(combined_proximity), cluster_columns = FALSE, column_title = "Metaprograms Proximity",
row_names_gp = grid::gpar(fontsize = 5), name = "proximity", show_row_names = T, show_row_dend = F)
# regional comp downstream significant ---------------------------------------------------
spots_numv1 <-sapply(samp_list, function(smp){
samp_df <- readRDS('test.rds')
return(nrow(samp_df))
})
names(spots_numv1) <- sapply(str_split(names(spots_numv1), "\."), function(x){return(x[1])})
spots_num <-sapply(samp_list, function(smp){
samp_df <- readRDS('test.rds')
pairs_n <- sapply(c(1:ncol(pairs)),function(p){
p_table <- samp_df[samp_df$spot_type_meta_new %in% c(pairs[,p]),]
return(nrow(p_table))
})
return(pairs_n)
})
colnames(spots_num) <- sapply(str_split(colnames(spots_num), "\."), function(x){return(x[1])})
proximity_bin <- lapply(c(1:26),function(i){
r1 = all_proximity[[i]]
r2 = all_proximity_rand[[i]]
n_df = as.data.frame(spots_num[,names(all_proximity)[i]])
n_df <- cbind(n_df, rep(n_df,14))
fisher = ((0.5*log((1 r1)/(1-r1)))-(0.5*log((1 r2)/(1-r2))))/((1/(n_df-3)) (1/(n_df-3)))^0.5
p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
colnames(p.value) <- as.character(1:15)
bin_pval <- ifelse(p.value < 0.0000000001,1,NA)
# bin_pval <- ifelse(p.value < 0.00001,1,0)
rbin <- r1*bin_pval
return(rbin)
})
names(proximity_bin) <- names(all_proximity)
生活很好,有你更好