单细胞数据复现-肺癌文章代码复现4

2022-05-18 20:31:12 浏览数 (1)

单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648

单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619

单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043

前面是主要对epi细胞进行的基本的分析。

选取癌症的样本进行分析

代码语言:txt复制
##subset抓取部分的数据集的内容
epi_tumor <- subset(subset(epi_anno, subset = cluster_type == "Tumor"), subset = tissue_type == "Tumor")
epi_tumor <- ScaleData(epi_tumor)

###tumor cell marker genes

Idents(epi_tumor) <- epi_tumor@meta.data$patient_id
markers <- FindAllMarkers(epi_tumor, only.pos = T, min.pct = 0.25, min.diff.pct = 0.25)
##摘取top基因,更改wt = avg_logFC,先查看markers gene的参数
top_TC_markers <- markers %>% group_by(cluster) %>% top_n(10, wt = avg_log2FC)
##这个颜色相对于比seurat原包的颜色较为好看
DoHeatmap(epi_tumor, features = top_TC_markers$gene, group.by = "patient_id", draw.lines = F, group.colors = use_colors)  
  scale_fill_viridis()
ggsave2("HeatMap_Tumor.pdf", path = "output/fig2", width = 30, height = 30, units = "cm")
ggsave2("Fig2C.png", path = "../results", width = 30, height = 30, units = "cm")

分析有丝分裂的发生基因

代码语言:txt复制
mitotic_activity <- FetchData(epi_tumor, c("tissue_type", "cell_type_epi", "Phase", "sample_id")) %>%
  mutate(sample_id = factor(sample_id, levels = c("p034t", "p033t", "p032t", "p031t", "p030t", "p027t", "p024t", "p023t", "p019t", "p018t")))

ggplot(mitotic_activity, aes(x = sample_id, fill = Phase))  
  geom_bar(position = "fill", width = 0.75)  
  scale_fill_manual(values = use_colors)  
  coord_flip()
ggsave2("SuppFiqg3D.pdf", path = "../results", width = 12, height = 10, units = "cm")

progeny pathway scores

代码语言:txt复制
#clustered heatmap progeny scores
##gather做数据清洗的

progeny_scores <- as.data.frame(t(GetAssayData(epi_tumor, assay = "progeny", slot = "scale.data")))
progeny_scores$cell_id <- rownames(progeny_scores)
progeny_scores <- gather(progeny_scores, Pathway, Activity, -cell_id)

cells_clusters <- FetchData(epi_tumor, c("sample_id", "cluster_type")) %>% filter(str_detect(sample_id, "t"))
cells_clusters$cell_id <- rownames(cells_clusters)

##full_join并集;inner_join交集
progeny_scores <- inner_join(progeny_scores, cells_clusters)

summarized_progeny_scores <- progeny_scores %>% 
  group_by(Pathway, sample_id) %>% 
  summarise(avg = mean(Activity), std = sd(Activity)) %>%
  pivot_wider(id_cols = Pathway, names_from = sample_id, values_from = avg) %>%
  column_to_rownames("Pathway") %>%
  as.matrix()
##这里画图的时候我发现还是ggsave相对好一些,有的时候捕捉到不到图片
pdf("../results/Fig2E.pdf", width = 6, height = 8)
heatmap.2(summarized_progeny_scores, trace = "none", density.info = "none", col = bluered(100))
dev.off()

correlation mutational status ~ progeny scores

代码语言:javascript复制
summarized_progeny_scores_mutations <- summarized_progeny_scores %>%
  t() %>%
  as.data.frame()

##mutate()R语言中的函数用于在DATAframe中添加新变量
summarized_progeny_scores_mutations$Sample <- rownames(summarized_progeny_scores_mutations)
summarized_progeny_scores_mutations <- summarized_progeny_scores_mutations %>%
  mutate(KRAS_status = ifelse(Sample %in% c("p018t", "p023t", "p030t", "p031t", "p032t", "p033t"), "mutated", "wildtype")) %>%
  mutate(TP53_status = ifelse(Sample %in% c("p023t", "p027t"), "mutated", "wildtype")) %>%
  mutate(PIK3CA_status = ifelse(Sample %in% c("p031t"), "mutated", "wildtype")) %>%
  mutate(KRAS_status = factor(TP53_status, levels = c("wildtype", "mutated"))) %>%
  mutate(TP53_status = factor(TP53_status, levels = c("wildtype", "mutated")))

ggplot(summarized_progeny_scores_mutations, aes(x = KRAS_status, y = MAPK))  
  geom_boxplot()  
  ggtitle(paste0(t.test(formula = MAPK~KRAS_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_KRAS~MAPK.pdf", path = "../results", width = 8, height = 8, units = "cm")

ggplot(summarized_progeny_scores_mutations, aes(x = KRAS_status, y = EGFR))  
  geom_boxplot()  
  ggtitle(paste0(t.test(formula = EGFR~KRAS_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_KRAS~EGFR.pdf", path = "../results", width = 8, height = 8, units = "cm")

ggplot(summarized_progeny_scores_mutations, aes(x = TP53_status, y = p53))  
  geom_boxplot()  
  ggtitle(paste0(t.test(formula = p53~TP53_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_p53~TP53.pdf", path = "../results", width = 8, height = 8, units = "cm")

rerun PCA

代码语言:javascript复制
// epi_pca <- epi_tumor

epi_pca <- RunPCA(epi_pca)

DimPlot(epi_pca, reduction = "pca", group.by = "patient_id", dims = c(1,2))
DimPlot(epi_pca, reduction = "pca", group.by = "patient_id", dims = c(3,4))

DimPlot(epi_pca, reduction = "pca", group.by = "histo_subtype")

DimHeatmap(epi_pca, dims = 1, cells = 1000, balanced = T, fast = F, nfeatures = 60)  
  scale_fill_viridis()
#ggsave2("DimHeatmap_epitumor_PC1.pdf", path = "output/fig2", width = 10, height = 20, units = "cm")
ggsave2("SuppFig3F.png", path = "../results", width = 10, height = 20, units = "cm")


### low-dimensional UMAPs

DimPlot(epi_pca, group.by = "patient_id")
DimPlot(epi_pca, group.by = "histo_subtype")

##可以用于后续的服务器R文件的保存
for (i in c(4, 6, 8)){
  umaptest <- RunUMAP(epi_pca, dims = 1:i, verbose = F)
  print(DimPlot(umaptest, reduction = "umap", group.by = "patient_id", cols = use_colors)   labs(title = paste0(i, " dimensions"))   coord_fixed())
  ggsave2(paste0("SuppFig3E_", i, "dimensions_patient.png"), path = "../results", width = 15, height = 10, units = "cm")
  print(DimPlot(umaptest, reduction = "umap", group.by = "histo_subtype", cols = use_colors)   labs(title = paste0(i, " dimensions"))   coord_fixed())
  ggsave2(paste0("SuppFig3E_", i, "dimensions_histo.png"), path = "../results", width = 15, height = 10, units = "cm")
  print(FeaturePlot(umaptest, features = "PC_1", order = T, cols = viridis(10))   labs(title = paste0(i, " dimensions"))   coord_fixed())
  ggsave2(paste0("SuppFig3E_", i, "dimensions_PC1.png"), path = "../results", width = 15, height = 10, units = "cm")
  remove(umaptest)
}


### bin cells along differentation gradients

# tumor cell signatures "alveolar/club-like" = tumor_signature, "undifferentiated" = anti_tumor_signature

epi_pca <- AddModuleScore(epi_pca, features = list(c("SCGB3A1", "PIGR", "NAPSA", "C4BPA", "SCGB3A2", "HLA-DRA", "CD74", "ADGRF5", "C16orf89", "FOLR1", "SELENBP1", "HLA-DRB1", "ID4", "MGP", "AQP3", "CA2", "LHX9", "HLA-DPB1", "FMO5", "GKN2", "C5", "MUC1", "NPC2", "RNASE1", "PIK3C2G", "SFTA2", "SLC34A2", "HLA-DPA1", "FGFR3", "PGC")), name = "tumor_signature")
epi_pca <- AddModuleScore(epi_pca, features = list(c("DSG2", "CAMK2N1", "FAM3C", "KRT7", "IFI27", "SLC2A1", "MARCKS", "PLAU", "AHNAK2", "PERP", "S100A4", "KRT19", "COL6A1", "UACA", "COL17A1", "CDA", "TPM2", "S100A16", "KRT8", "PRSS23", "DST", "LAMC2", "S100P", "PRSS3", "LAMA3", "DSP", "ITGA3", "MDK", "FAM83A", "ITGB4")), name = "anti_tumor_signature")


epi_tumor_data <- FetchData(epi_pca, c(colnames(epi_pca@meta.data), paste0("PC_", 1:10)))

progeny_scores_data <- as.data.frame(t(GetAssayData(epi_tumor, assay = "progeny", slot = "scale.data")))
progeny_scores_data$cell_id <- rownames(progeny_scores_data)

epi_tumor_data <- full_join(epi_tumor_data, progeny_scores_data, by = "cell_id")


##rownames:接受参数(向量),类矩阵的对象
##cut_number:切割函数
progeny_names <- rownames(epi_pca@assays$progeny)

epi_tumor_data$bin <- cut_number(epi_tumor_data$PC_1, n = 10, labels = c(1:10))

ggplot(epi_tumor_data, mapping = aes(x = bin))  
  geom_bar()

# patients along PC1

ggplot(epi_tumor_data, mapping = aes(x = bin, fill = factor(patient_id, levels = c("p032", "p018", "p019", "p024", "p031", "p030", "p033", "p023", "p027", "p034"))))  
  geom_bar()  
  theme(legend.title = element_blank())  
  scale_fill_manual(values = use_colors)
ggsave2("SuppFig3G.pdf", path = "../results", width = 30, height = 30, units = "cm")

# histological subtypes along PC1

ggplot(epi_tumor_data, mapping = aes(x = bin, fill = factor(histo_subtype, levels = c("lepidic", "acinar", "mucinuous (papillary)", "(micro)papillary", "solid", "sarcomatoid"))))  
  geom_bar()  
  theme(legend.title = element_blank())  
  scale_fill_manual(values = use_colors)
ggsave2("Fig2F.pdf", path = "../results", width = 30, height = 30, units = "cm")

# cell cycle phase along PC1

ggplot(epi_tumor_data, mapping = aes(x = bin, fill = Phase))  
  geom_bar()  
  scale_fill_manual(values = use_colors)
ggsave2("SuppFig3H.pdf", path = "../results", width = 30, height = 30, units = "cm")

# normal cell type signatures along PC1
##猖狂数据互换函数pivot_longer
epi_tumor_data %>%
  select(c(bin, VB_Basal.11, VB_Basal.22, VB_Ciliated.13, VB_Ciliated.24, VB_Club5, VB_Goblet.26, VB_Goblet.17, VB_Ionocytes8, VB_Type.2.alveolar9, VB_Type.1.alveolar10)) %>%
  pivot_longer(cols = c(VB_Basal.11, VB_Basal.22, VB_Ciliated.13, VB_Ciliated.24, VB_Club5, VB_Goblet.26, VB_Goblet.17, VB_Ionocytes8, VB_Type.2.alveolar9, VB_Type.1.alveolar10), names_to = "cell_type") %>%
  group_by(bin, cell_type) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  ggplot(aes(x = as.numeric(bin), y = mean, color = cell_type))  
  geom_line(size = 1)  
  scale_color_brewer(type = "qual", palette = "Paired")  
  scale_x_continuous(breaks = c(1:20))
ggsave2("Fig2G.pdf", path = "../results", width = 30, height = 30, units = "cm")

# progeny pathway scores along PC1

getcolors <- colorRampPalette(brewer.pal(14, "Dark2"))

epi_tumor_data %>%
  select(bin, progeny_names) %>%
  pivot_longer(cols = progeny_names, names_to = "progeny") %>%
  group_by(bin, progeny) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  ggplot(aes(x = as.numeric(bin), y = mean, color = progeny))  
  geom_line(size = 1)  
  scale_color_manual(values = getcolors(14))  
  scale_x_continuous(breaks = c(1:20))
ggsave2("Fig2H.pdf", path = "../results", width = 30, height = 30, units = "cm")

# tumor cell signatures along PC1

epi_tumor_data %>%
  select(bin, tumor_signature1, anti_tumor_signature1) %>%
  pivot_longer(cols = c(tumor_signature1, anti_tumor_signature1), names_to = "genes") %>%
  group_by(bin, genes) %>%
  summarise(mean = mean(value), sd = sd(value)) %>%
  ggplot(aes(x = as.numeric(bin), y = mean, color = genes))  
  geom_line(size = 1)  
  geom_errorbar(aes(ymin=mean-sd, ymax=mean sd), width = 0.3)  
  scale_x_continuous(breaks = c(1:20))
ggsave2("SuppFig3I.pdf", path = "../results", width = 30, height = 30, units = "cm")

0 人点赞