单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648
单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619
单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043
单细胞数据复现-肺癌文章代码复现4https://cloud.tencent.com/developer/article/2006654
单细胞数据复现-肺癌文章代码复现5https://cloud.tencent.com/developer/article/2008487
单细胞数据复现-肺癌文章代码复现6https://cloud.tencent.com/developer/article/2008704
单细胞数据复现-肺癌文章代码复现7https://cloud.tencent.com/developer/article/2019634
前面得教程是将数据进行的降维处理,然后选择出了比较重要的三个亚群,然后对亚群进行细分,然后文章开始对每个亚群的相关性的细胞进行分析。
load libraries
代码语言:javascript复制library(ggplot2)
library(Seurat)
library(dplyr)
library(tidyr)
library(cowplot)
library(gplots)
library(RColorBrewer)
library(plotrix)
library(corrplot)
library(pcaMethods)
library(readr)
颜色配置的加载。
代码语言:javascript复制#color scheme
use_colors <- c(
Tumor = "brown2",
Normal = "deepskyblue2",
G1 = "#46ACC8",
G2M = "#E58601",
S = "#B40F20",
Epithelial = "seagreen",
Immune = "darkgoldenrod2",
Stromal = "steelblue",
p018 = "#E2D200",
p019 = "#46ACC8",
p023 = "#E58601",
p024 = "#B40F20",
p027 = "#0B775E",
p028 = "#E1BD6D",
p029 = "#35274A",
p030 = "#F2300F",
p031 = "#7294D4",
p032 = "#5B1A18",
p033 = "#9C964A",
p034 = "#FD6467",
lepidic = "#0B775E",
acinar = "#74A089",
`mucinuous (papillary)` = "#E2D200",
`(micro)papillary` = "#CEAB07",
solid = "#B40F20",
sarcomatoid = "#5B1A18")
每个亚群的数据加载
代码语言:javascript复制###load data and subsetting
##这个是以前保存的RDS数据
epi_anno <- readRDS("seurat_objects/epi_anno.RDS")
##将tumor分组的结果进行提取
epi_tumor <- subset(subset(epi_anno, subset = cluster_type == "Tumor"), subset = tissue_type == "Tumor")
##随后对提取出来的结果进行均一化
epi_tumor <- ScaleData(epi_tumor)
##对tumor的结果进行pca处理,将tumor赋予给epi_pca
epi_pca <- epi_tumor
epi_pca <- RunPCA(epi_pca)
##读取imm.rds的数据
imm_anno <- readRDS("seurat_objects/imm_anno.RDS")
##前期在保存的时候已经赋予了多个细胞类型的水平,然后在meta.data中进行提取
##先按照要求的因子水平进行排序
imm_anno@meta.data$cell_type_imm <- ordered(imm_anno@meta.data$cell_type_imm, levels = c("Alveolar_Macrophages1",
"Alveolar_Macrophages2",
"Alveolar_Macrophages3",
"CD14_Macrophages1",
"CD14_Macrophages2",
"CD14_Macrophages3",
"CD14_Macrophages4",
"CD14_Macrophages5",
"Macrophages_Proliferating",
"Monocytes",
"Myeloid_Dendritic",
"Plasmacytoid_Dendritic",
"Mast",
"T_conv1",
"T_conv2",
"T_reg",
"T_CD8_1",
"T_CD8_2",
"T_CD8_3",
"T_CD8_Proliferating",
"NK_cells",
"B_cells",
"Plasma"))
##选用subset函数选取子集
imm_lympho <- subset(imm_anno, subset = cell_type_imm %in% c("T_conv1",
"T_conv2",
"T_reg",
"T_CD8_1",
"T_CD8_2",
"T_CD8_3",
"T_CD8_Proliferating",
"NK_cells",
"B_cells",
"Plasma"))
imm_myelo <- subset(imm_anno, subset = cell_type_imm %in% c("Alveolar_Macrophages1",
"Alveolar_Macrophages2",
"Alveolar_Macrophages3",
"CD14_Macrophages1",
"CD14_Macrophages2",
"CD14_Macrophages3",
"CD14_Macrophages4",
"CD14_Macrophages5",
"Macrophages_Proliferating",
"Monocytes",
"Myeloid_Dendritic",
"Plasmacytoid_Dendritic",
"Mast"))
##选用FetchData抓取数据集中的基因表达值
lympho_counts <- FetchData(imm_lympho, vars = c("tissue_type", "cell_type_imm", "sample_id", "patient_id")) %>%
mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal")))
myelo_counts <- FetchData(imm_myelo, vars = c("tissue_type", "cell_type_imm", "sample_id", "patient_id")) %>%
mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal")))
##读取strdata
str_anno <- readRDS("seurat_objects/str_anno.RDS")
##根据不同的细胞水平的值也进行提取,选取factor函数进行因子创建
str_anno@meta.data$cell_type_str <- factor(str_anno@meta.data$cell_type_str, levels = c("Endothelial1",
"Endothelial2",
"Endothelial3",
"Endothelial4",
"Endothelial5",
"Endothelial6",
"Endothelial7",
"Lymphaticendothelial",
"Fibroblast1",
"Fibroblast2",
"Myofibroblast1",
"Myofibroblast2",
"Smoothmuscle1",
"Smoothmuscle2",
"Mesothelial"))
##选用subset函数进行表皮细胞的抓取
str_endo <- subset(str_anno, subset = cell_type_str %in% c("Endothelial1",
"Endothelial2",
"Endothelial3",
"Endothelial4",
"Endothelial5",
"Endothelial6",
"Endothelial7",
"Lymphaticendothelial"))
str_fibro <- subset(str_anno, subset = cell_type_str %in% c("Fibroblast1",
"Fibroblast2",
"Myofibroblast1",
"Myofibroblast2",
"Smoothmuscle1",
"Smoothmuscle2",
"Mesothelial"))
##选用FetchaData函数进行不同细胞水平基因表达值得提取
endo_counts <- FetchData(str_endo, vars = c("tissue_type", "cell_type_str", "sample_id", "patient_id")) %>%
mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal")))
fibro_counts <- FetchData(str_fibro, vars = c("tissue_type", "cell_type_str", "sample_id", "patient_id")) %>%
mutate(tissue_type = factor(tissue_type, levels = c("Tumor", "Normal")))
count immune and stromal cells
代码语言:javascript复制##先对myelo_counts得数据进行处理,filter去除Tumor得数据,然后按照病人来源计算,并通过病人得来源进行展示,然后mutate添加计数值得新变量
myelo_counts_rel <- myelo_counts %>%
filter(tissue_type == "Tumor") %>%
dplyr::count(cell_type_imm, patient_id) %>%
group_by(patient_id) %>%
mutate(n_rel = n/sum(n))
##pivot_wider将结果中得长表变成宽表
myelo_counts_rel <- myelo_counts_rel %>%
pivot_wider(id_cols = patient_id, names_from = cell_type_imm, values_from = n_rel)
lympho_counts_rel <- lympho_counts %>%
filter(tissue_type == "Tumor") %>%
dplyr::count(cell_type_imm, patient_id) %>%
group_by(patient_id) %>%
mutate(n_rel = n/sum(n))
lympho_counts_rel <- lympho_counts_rel %>%
pivot_wider(id_cols = patient_id, names_from = cell_type_imm, values_from = n_rel)
fibro_counts_rel <- fibro_counts %>%
filter(tissue_type == "Tumor") %>%
dplyr::count(cell_type_str, patient_id) %>%
group_by(patient_id) %>%
mutate(n_rel = n/sum(n))
fibro_counts_rel <- fibro_counts_rel %>%
pivot_wider(id_cols = patient_id, names_from = cell_type_str, values_from = n_rel)
endo_counts_rel <- endo_counts %>%
filter(tissue_type == "Tumor") %>%
dplyr::count(cell_type_str, patient_id) %>%
group_by(patient_id) %>%
mutate(n_rel = n/sum(n))
endo_counts_rel <- endo_counts_rel %>%
pivot_wider(id_cols = patient_id, names_from = cell_type_str, values_from = n_rel)
##这里将counts值进行合并,这里可以使用管道符%>%,因为by里面得变量是一样得
cell_counts_rel <- full_join(myelo_counts_rel, lympho_counts_rel, by = "patient_id")
cell_counts_rel <- full_join(cell_counts_rel, endo_counts_rel, by = "patient_id")
cell_counts_rel <- full_join(cell_counts_rel, fibro_counts_rel, by = "patient_id")
cell_counts_rel[is.na(cell_counts_rel)] <- 0