空间转录组合作项目分析示例(四)

2024-09-09 16:28:46 浏览数 (1)

作者,Evil Genius

上一篇我们完成了空间注释之后,立马就要来到我们的下一步分析,细胞的空间位置关系。也是空间转录组必备的分析之一。

其中包括不同细胞类型之间的空间关系

以及相同细胞类型的空间位置关系

课程上分享的代码均可实现,但是我们不走回头路,分享就分享新的。

首先定义分析空间细胞类型共定位的函数

代码语言:javascript复制
###篇幅限制,无法展示

大家还知道不同平台共定位需要考虑的邻居数么?

主脚本

代码语言:javascript复制
library(stringr)
library(cocor)
library(dplyr)
library(data.table)
library(stats)
library(scalop)
library(tibble)
library(ggrepel)
library(tidyr)
library(scales)
library(matrixStats)

# Run functions first

# co-localization  ---------------------------------------------------------
#### log-transform expression matrices 
m <- readRDS("test.rds")
m <- m[-grep("^MT-|^RPL|^RPS", rownames(m)), ]
if(min(colSums(m)) == 0){m <- m[, colSums(m) != 0]}
 scaling_factor <- 1000000/colSums(m)
 m_CPM <- sweep(m, MARGIN = 2, STATS = scaling_factor, FUN = "*")
 m_loged <- log2(1   (m_CPM/10))
 # removing genes with zero variance across all cells
 var_filter <- apply(m_loged, 1, var)
 m_proc <- m_loged[var_filter != 0, ]
 # filtering out lowly expressed genes
 exp_genes <- rownames(m_proc)[(rowMeans(m_proc) > 0.4)]
 m_proc <- m_proc[exp_genes, ]
 # output to a list 
 per_sample_mat[[i]] <- m_proc
 names(per_sample_mat)[i] <- sample_ls[[i]]
 rm(path, files, m, m_CPM, m_loged, var_filter, exp_genes, m_proc)

#### co-localiztion
results<-list()
for(i in seq_along(names(all_decon))) {
  decon <- all_decon[[sample_ls[i]]]
  spot_list <- lapply(c(1:500),function(x){ #num. of shuffled matrices
    all_row <- sapply(c(1:dim(decon)[2]),function(c){
      new_row <- sample(c(1:length(row.names(decon))), length(row.names(decon)), replace = FALSE)
      return(new_row) 
    })
    new_mat <- sapply(c(1:dim(decon)[2]),function(c){
      sh_row <- decon[,c][all_row[,c]]#generate many deconv matrices by shuffling cell types
      return(sh_row)
    })
    colnames(new_mat) <- colnames(decon)
    return(new_mat)
  }) 
  
  #run colocal_mat_fun -- perform colocalization on the shuffled decon matrices
  mats_list<- lapply(spot_list, function(new_decon){
    colocal_mats<-colocal_mat(new_decon)
    return(colocal_mats)
  })
  mean_mat <- sapply(mats_list,function(x){ #expected colocalization calculated from shuffled decon matrices
    return(x[,"ab_comb"])
  })
  pairs_shuf_mean <- apply(mean_mat,1,mean)
  names(pairs_shuf_mean) <- mats_list[[1]][,"pairs"]
  rownames(mean_mat) <- mats_list[[1]][,"pairs"]
  
  obs_coloc <- colocal_mat(decon)
  
  r1 = obs_coloc$ab_comb #observed colocal
  r2 = pairs_shuf_mean #expected colocal
  
  n = dim(decon)[1] #num total spots
  
  fisher = ((0.5*log((1 r1)/(1-r1)))-(0.5*log((1 r2)/(1-r2))))/((1/(n-3)) (1/(n-3)))^0.5
  
  p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
  effect_size <- r1/r2 #observed/expected
  results[[i]]<-data.frame(pvalue = p.value,
                           effect_size = effect_size)
  
}
names(results) <- sample_ls

results2<-lapply(results, dplyr::add_rownames, 'pairs')
results2<- lapply(results2,function(x){
  as.matrix(x)
})
results2 <- lapply(results2, function(x){rownames(x) <- x[,1]; x})
df <- do.call("rbind", results2)

###volcano plot (all pairs, all samples)###
df->results_comb2
results_comb2<-as.data.frame(results_comb2)
results_comb2$pvalue<-as.numeric(results_comb2$pvalue)
results_comb2$effect_size<-as.numeric(results_comb2$effect_size)
results_comb2$sig <- "NO"
results_comb2$sig[results_comb2$effect_size >= 1.3 & results_comb2$pvalue <= 0.01] <- "enriched"
results_comb2$sig[results_comb2$effect_size <= 0.7 & results_comb2$pvalue <= 0.01] <- "depleted"
results_comb2$label <- NA
results_comb2$label[results_comb2$sig != "NO"] <- results_comb2$pairs[results_comb2$sig != "NO"]

ggplot(data=results_comb2, aes(x=effect_size, y=-log10(pvalue), col=sig, label=label))   
  geom_point()   
  theme_minimal()  
  geom_text_repel() 
  theme(text = element_text(size = 20))
代码语言:javascript复制
### Adjacency  --------------------------------------------------------------

mp_assign <- readRDS("test.rds", sep = "")

all_zones <- readRDS("final_zones.rds")

extend_metadata <- tibble()
generate_metadata <- sapply(c(1:length(sample_ls)), function(i){
  print(sample_ls[i])
  
  # load data
  spots_positions <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
 
row.names(spots_positions) <- spots_positions$V1
spots_clusters <- readRDS("test.rds", sep = "")
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes  
  
  
metadata <- tibble(Key = paste(sample_ls[i],spots_clusters$barcodes, sep="_"),
                     SpotID = spots_clusters$barcodes,
                     Sample = rep(sample_ls[i], nrow(spots_clusters)),
                     MPid = spots_clusters$spot_type,
                     array_row = spots_positions[spots_clusters$barcodes,"V3"],
                     array_col = spots_positions[spots_clusters$barcodes, "V4"],
                     pxl_in_rows = spots_positions[spots_clusters$barcodes, "V5"],
                     pxl_in_cols = spots_positions[spots_clusters$barcodes, "V6"],
                     Zone = as.character(all_zones[[i]][spots_clusters$barcodes]))
  
  extend_metadata <<- rbind.data.frame(extend_metadata, metadata)
})

set.seed(50)
neighbs_stats <- neighbor_spot_props(metadata = extend_metadata, 
                                     zone = "All", 
                                     #site = "All", 
                                     samples = "All",
                                     #zone_by = "EpiStroma", 
                                     n_cores = 30, 
                                     plot_perm_distr = TRUE, 
                                     n_perm = 10000, 
                                     filter_signif = TRUE, 
                                     zscore_thresh = 1)




# regional comp  (Previously run by server. Possible to run per sample below)----------------------------------------------------------

#!/usr/bin/env Rscript
#args = commandArgs(trailingOnly = TRUE)

#sname <- as.character(args[1])
#sname <- str_replace(sname, "r", "")
#print(paste("I got the samp right", sname))

sname <- "" #insert sample name 
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
mp_assign <- readRDS('test.rds')
return(unique(mp_assign$spot_type_meta_new))
}))))

max_win_size <- 15
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})

# load data
spots_positions <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions) <- spots_positions$V1

spots_clusters <- readRDS('test.rds')
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes  

neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)

all_spots <- spots_clusters$barcodes

all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
  proximity <- t(sapply(all_spots, function(spot){
    win_spots <- c(spot)
    sapply(c(1:win_size), function(i){
      win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
    })
    win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
    return(win_abund)
  }))
  
  old_clusters <- colnames(proximity)
  add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
  
  if (length(add_clusters) != 0){
    sapply(add_clusters,function(clust){
      proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
    })
    colnames(proximity)[c((length(old_clusters) 1):dim(proximity)[2])] <- add_clusters
  }
  
  proximity <- proximity[,sort(colnames(proximity))]
  
  is_one <- apply(proximity,1,function(ro){
    return(1 %in% ro )
  })
  
  proximity <- proximity[!is_one,]
  
  all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
    pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
    return(pair_cor)
  })
  
  all_cor <- data.frame(pair_cors = all_cor)
  row.names(all_cor) <- pairs_names
  return(all_cor)
  
})
sample_proximity <- as.data.frame(all_pval_windows)
row.names(sample_proximity) <- pairs_names



# regional comp random (Previously run by server. Possible to run per sample below) ---------------------------------------------------

#!/usr/bin/env Rscript
#args = commandArgs(trailingOnly = TRUE)

#sname <- as.character(args[1])
#sname <- str_replace(sname, "r", "")

sname <- "" #insert sample name 
gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
 mp_assign <- readRDS('test.rds')
 return(unique(mp_assign$spot_type_meta_new))
}))))

max_win_size <- 15
pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
rand_num <- 500

# load data
spots_positions_orign <- read.csv("/test/outs/spatial/tissue_positions_list.csv", header = FALSE, stringsAsFactors = FALSE)
row.names(spots_positions_orign) <- spots_positions_orign$V1

spots_clusters <- readRDS("test.rds", sep = "")
spots_clusters <- na.omit(spots_clusters)
colnames(spots_clusters) <- c("barcodes", "spot_type")
row.names(spots_clusters)<- spots_clusters$barcodes  

all_rand <- lapply(c(1:rand_num),function(j){
  new_pos_all <- sample(spots_positions_orign$V1[spots_positions_orign$V2 != 0], length(spots_positions_orign$V1[spots_positions_orign$V2 != 0]), replace = FALSE)
  spots_positions <- spots_positions_orign
  spots_positions$V1[spots_positions$V2 != 0] <- new_pos_all
  
  neighbors_table <- prox_neighbors_table_func(spots_positions,spots_clusters)
  
  all_spots <- spots_clusters$barcodes
  
  
  all_pval_windows <- sapply(c(1:max_win_size), function(win_size){
    proximity <- t(sapply(all_spots, function(spot){
      win_spots <- c(spot)
      sapply(c(1:win_size), function(i){
        win_spots <<- unique(c(win_spots,unique(na.omit(as.character(neighbors_table[win_spots,])))))
      })
      win_abund <- table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots])/sum(table(spots_clusters$spot_type[spots_clusters$barcodes %in% win_spots]))
      return(win_abund)
    }))
    
    old_clusters <- colnames(proximity)
    add_clusters <- gen_clusters[!gen_clusters %in% old_clusters]
    
    if (length(add_clusters) != 0){
      sapply(add_clusters,function(clust){
        proximity <<- cbind(proximity,rep(0,dim(proximity)[1]))
      })
      colnames(proximity)[c((length(old_clusters) 1):dim(proximity)[2])] <- add_clusters
    }
    
    proximity <- proximity[,sort(colnames(proximity))]
    
    is_one <- apply(proximity,1,function(ro){
      return(1 %in% ro )
    })
    
    proximity <- proximity[!is_one,]
    
    all_cor <- sapply(c(1:dim(pairs)[2]), function(j){
      pair_cor <- cor(proximity[,pairs[1,j]], proximity[,pairs[2,j]])
      return(pair_cor)
    })
    
    all_cor <- data.frame(pair_cors = all_cor)
    row.names(all_cor) <- pairs_names
    return(all_cor)
    
  })
  sample_proximity <- as.data.frame(all_pval_windows)
  row.names(sample_proximity) <- pairs_names
  return(sample_proximity)
})
sample_mean_rand_prox <- Reduce(" ", all_rand) / length(all_rand)

sample_sd_rand_prox <- round(apply(array(unlist(all_rand), c(length(pairs_names), max_win_size, rand_num)), c(1,2), sd),4)



# regional comp downstream ------------------------------------------------

gen_clusters <- as.character(unique(unlist(sapply(c(1:26), function(i){
  mp_assign <- readRDS('test.rds')
  return(unique(mp_assign$spot_type_meta_new))
}))))

pairs <- combn(sort(gen_clusters),2)
pairs_names <- apply(pairs, 2, function(x){return(paste(x[1],x[2], sep = " "))})
all_zones <- readRDS("Spatial_coh_zones/spatial_zonesv3.rds")

all_proximity_list <- list.files("Spatial_coh_zones/proximity_samples/")

all_proximity <- lapply(all_proximity_list, function(prox){
  samp_prox <- readRDS('test.rds')
  return(samp_prox)
})
names(all_proximity) <- sapply(str_split(all_proximity_list, "_"), function(x){return(x[1])})

all_proximity_rand_list <- list.files("Spatial_coh_zones/proximity_rand_samples/")

all_proximity_rand <- lapply(all_proximity_rand_list, function(prox){
  samp_prox <- readRDS('test.rds')
  return(samp_prox)
})
names(all_proximity_rand) <- sapply(str_split(all_proximity_rand_list, "_"), function(x){return(x[1])})


combined_proximity <- t(sapply(c(1:length(pairs_names)), function(i){
  pair_prox <- sapply(all_proximity, function(x){
    pair_sample_df <- as.data.frame(x[i,c(1:15)])
    colnames(pair_sample_df) <- c(as.character(c(1:15)))
    return(pair_sample_df)
  })
  return(apply(pair_prox,1,function(x){mean(na.omit(as.numeric(x)))}))
}))

row.names(combined_proximity) <- row.names(all_proximity[[1]])



Heatmap(na.omit(combined_proximity), cluster_columns = FALSE, column_title = "Metaprograms Proximity", 
        row_names_gp = grid::gpar(fontsize = 5), name = "proximity", show_row_names = T, show_row_dend = F)
# regional comp downstream significant ---------------------------------------------------

spots_numv1 <-sapply(samp_list, function(smp){
  samp_df <- readRDS('test.rds')
  return(nrow(samp_df))
})
names(spots_numv1) <- sapply(str_split(names(spots_numv1), "\."), function(x){return(x[1])})

spots_num <-sapply(samp_list, function(smp){
samp_df <- readRDS('test.rds')
pairs_n <- sapply(c(1:ncol(pairs)),function(p){
p_table <- samp_df[samp_df$spot_type_meta_new %in% c(pairs[,p]),]
return(nrow(p_table))
  })
  return(pairs_n)
})
colnames(spots_num) <- sapply(str_split(colnames(spots_num), "\."), function(x){return(x[1])})



proximity_bin <- lapply(c(1:26),function(i){
  r1 = all_proximity[[i]]
  r2 = all_proximity_rand[[i]]
  
  n_df = as.data.frame(spots_num[,names(all_proximity)[i]])
  n_df <- cbind(n_df, rep(n_df,14))
  
  fisher = ((0.5*log((1 r1)/(1-r1)))-(0.5*log((1 r2)/(1-r2))))/((1/(n_df-3)) (1/(n_df-3)))^0.5
  
  p.value = (2*(1-pnorm(abs(as.matrix(fisher)))))
  colnames(p.value) <- as.character(1:15)
  bin_pval <- ifelse(p.value < 0.0000000001,1,NA)
  #  bin_pval <- ifelse(p.value < 0.00001,1,0)
  rbin <- r1*bin_pval
  return(rbin)
})
names(proximity_bin) <- names(all_proximity)

生活很好,有你更好

0 人点赞