单细胞数据复现-肺癌文章代码复现8

2022-06-17 19:23:22 浏览数 (1)

单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648

单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619

单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043

单细胞数据复现-肺癌文章代码复现4https://cloud.tencent.com/developer/article/2006654

单细胞数据复现-肺癌文章代码复现5https://cloud.tencent.com/developer/article/2008487

单细胞数据复现-肺癌文章代码复现6https://cloud.tencent.com/developer/article/2008704

单细胞数据复现-肺癌文章代码复现7https://cloud.tencent.com/developer/article/2019634

前面得教程是将数据进行的降维处理,然后选择出了比较重要的三个亚群,然后对亚群进行细分,然后文章开始对每个亚群的相关性的细胞进行分析。

load libraries

代码语言:javascript复制
library(ggplot2)
library(Seurat)
library(dplyr)
library(tidyr)
library(cowplot)
library(gplots)
library(RColorBrewer)
library(plotrix)
library(corrplot)
library(pcaMethods)
library(readr)

颜色配置的加载。

代码语言:javascript复制
#color scheme
use_colors <- c(
  Tumor = "brown2",
  Normal = "deepskyblue2",
  G1 = "#46ACC8",
  G2M = "#E58601",
  S = "#B40F20",
  Epithelial = "seagreen",
  Immune = "darkgoldenrod2",
  Stromal = "steelblue",
  p018 = "#E2D200",
  p019 = "#46ACC8",
  p023 = "#E58601",
  p024 = "#B40F20",
  p027 = "#0B775E",
  p028 = "#E1BD6D",
  p029 = "#35274A",
  p030 = "#F2300F",
  p031 = "#7294D4",
  p032 = "#5B1A18",
  p033 = "#9C964A",
  p034 = "#FD6467",
  lepidic = "#0B775E",
  acinar = "#74A089",
  `mucinuous (papillary)` = "#E2D200",
  `(micro)papillary` = "#CEAB07",
  solid = "#B40F20",
  sarcomatoid = "#5B1A18")

每个亚群的数据加载

代码语言:javascript复制
###load data and subsetting
##这个是以前保存的RDS数据
epi_anno <- readRDS("seurat_objects/epi_anno.RDS")
##将tumor分组的结果进行提取
epi_tumor <- subset(subset(epi_anno, subset = cluster_type == "Tumor"), subset = tissue_type == "Tumor")
##随后对提取出来的结果进行均一化
epi_tumor <- ScaleData(epi_tumor)
##对tumor的结果进行pca处理,将tumor赋予给epi_pca
epi_pca <- epi_tumor
epi_pca <- RunPCA(epi_pca)
##读取imm.rds的数据
imm_anno <- readRDS("seurat_objects/imm_anno.RDS")
##前期在保存的时候已经赋予了多个细胞类型的水平,然后在meta.data中进行提取
##先按照要求的因子水平进行排序
imm_anno@meta.data$cell_type_imm <- ordered(imm_anno@meta.data$cell_type_imm, levels = c("Alveolar_Macrophages1",
                                                                                         "Alveolar_Macrophages2",
                                                                                         "Alveolar_Macrophages3",
                                                                                         "CD14_Macrophages1",
                                                                                         "CD14_Macrophages2",
                                                                                         "CD14_Macrophages3",
                                                                                         "CD14_Macrophages4",
                                                                                         "CD14_Macrophages5",
                                                                                         "Macrophages_Proliferating",
                                                                                         "Monocytes",
                                                                                         "Myeloid_Dendritic",
                                                                                         "Plasmacytoid_Dendritic",
                                                                                         "Mast",
                                                                                         "T_conv1",
                                                                                         "T_conv2",
                                                                                         "T_reg",
                                                                                         "T_CD8_1",
                                                                                         "T_CD8_2",
                                                                                         "T_CD8_3",
                                                                                         "T_CD8_Proliferating",
                                                                                         "NK_cells",
                                                                                         "B_cells",
                                                                                         "Plasma"))
##选用subset函数选取子集
imm_lympho <- subset(imm_anno, subset = cell_type_imm %in% c("T_conv1",
                                                             "T_conv2",
                                                             "T_reg",
                                                             "T_CD8_1",
                                                             "T_CD8_2",
                                                             "T_CD8_3",
                                                             "T_CD8_Proliferating",
                                                             "NK_cells",
                                                             "B_cells",
                                                             "Plasma"))

imm_myelo <- subset(imm_anno, subset = cell_type_imm %in% c("Alveolar_Macrophages1",
                                                            "Alveolar_Macrophages2",
                                                            "Alveolar_Macrophages3",
                                                            "CD14_Macrophages1",
                                                            "CD14_Macrophages2",
                                                            "CD14_Macrophages3",
                                                            "CD14_Macrophages4",
                                                            "CD14_Macrophages5",
                                                            "Macrophages_Proliferating",
                                                            "Monocytes",
                                                            "Myeloid_Dendritic",
                                                            "Plasmacytoid_Dendritic",
                                                            "Mast"))
##选用FetchData抓取数据集中的基因表达值
lympho_counts <- FetchData(imm_lympho, vars = c("tissue_type", "cell_type_imm", "sample_id", "patient_id")) %>%  
  mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal")))

myelo_counts <- FetchData(imm_myelo, vars = c("tissue_type", "cell_type_imm", "sample_id", "patient_id")) %>%  
  mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal"))) 
##读取strdata
str_anno <- readRDS("seurat_objects/str_anno.RDS")
##根据不同的细胞水平的值也进行提取,选取factor函数进行因子创建
str_anno@meta.data$cell_type_str <- factor(str_anno@meta.data$cell_type_str, levels = c("Endothelial1",
                                                                                        "Endothelial2",
                                                                                        "Endothelial3",
                                                                                        "Endothelial4",
                                                                                        "Endothelial5",
                                                                                        "Endothelial6",
                                                                                        "Endothelial7",
                                                                                        "Lymphaticendothelial",
                                                                                        "Fibroblast1",
                                                                                        "Fibroblast2",
                                                                                        "Myofibroblast1",
                                                                                        "Myofibroblast2",
                                                                                        "Smoothmuscle1",
                                                                                        "Smoothmuscle2",
                                                                                        "Mesothelial"))
##选用subset函数进行表皮细胞的抓取
str_endo <- subset(str_anno, subset = cell_type_str %in% c("Endothelial1",
                                                           "Endothelial2",
                                                           "Endothelial3",
                                                           "Endothelial4",
                                                           "Endothelial5",
                                                           "Endothelial6",
                                                           "Endothelial7",
                                                           "Lymphaticendothelial"))

str_fibro <- subset(str_anno, subset = cell_type_str %in% c("Fibroblast1",
                                                            "Fibroblast2",
                                                            "Myofibroblast1",
                                                            "Myofibroblast2",
                                                            "Smoothmuscle1",
                                                            "Smoothmuscle2",
                                                            "Mesothelial"))

##选用FetchaData函数进行不同细胞水平基因表达值得提取
endo_counts <- FetchData(str_endo, vars = c("tissue_type", "cell_type_str", "sample_id", "patient_id")) %>%  
  mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal")))

fibro_counts <- FetchData(str_fibro, vars = c("tissue_type", "cell_type_str", "sample_id", "patient_id")) %>%  
  mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal"))) 

count immune and stromal cells

代码语言:javascript复制
##先对myelo_counts得数据进行处理,filter去除Tumor得数据,然后按照病人来源计算,并通过病人得来源进行展示,然后mutate添加计数值得新变量
myelo_counts_rel <- myelo_counts %>%
  filter(tissue_type == "Tumor") %>%
  dplyr::count(cell_type_imm, patient_id) %>%
  group_by(patient_id) %>% 
  mutate(n_rel = n/sum(n))

##pivot_wider将结果中得长表变成宽表
myelo_counts_rel <- myelo_counts_rel %>%
  pivot_wider(id_cols = patient_id, names_from = cell_type_imm, values_from = n_rel)


lympho_counts_rel <- lympho_counts %>%
  filter(tissue_type == "Tumor") %>%
  dplyr::count(cell_type_imm, patient_id) %>%
  group_by(patient_id) %>% 
  mutate(n_rel = n/sum(n)) 


lympho_counts_rel <- lympho_counts_rel %>%
  pivot_wider(id_cols = patient_id, names_from = cell_type_imm, values_from = n_rel)


fibro_counts_rel <- fibro_counts %>%
  filter(tissue_type == "Tumor") %>%
  dplyr::count(cell_type_str, patient_id) %>%
  group_by(patient_id) %>% 
  mutate(n_rel = n/sum(n))


fibro_counts_rel <- fibro_counts_rel %>%
  pivot_wider(id_cols = patient_id, names_from = cell_type_str, values_from = n_rel)


endo_counts_rel <- endo_counts %>%
  filter(tissue_type == "Tumor") %>%
  dplyr::count(cell_type_str, patient_id) %>%
  group_by(patient_id) %>% 
  mutate(n_rel = n/sum(n))


endo_counts_rel <- endo_counts_rel %>%
  pivot_wider(id_cols = patient_id, names_from = cell_type_str, values_from = n_rel)

##这里将counts值进行合并,这里可以使用管道符%>%,因为by里面得变量是一样得
cell_counts_rel <- full_join(myelo_counts_rel, lympho_counts_rel, by = "patient_id")
cell_counts_rel <- full_join(cell_counts_rel, endo_counts_rel, by = "patient_id")
cell_counts_rel <- full_join(cell_counts_rel, fibro_counts_rel, by = "patient_id")
cell_counts_rel[is.na(cell_counts_rel)] <- 0

0 人点赞