单细胞数据复现-肺癌文章代码复现1https://cloud.tencent.com/developer/article/1992648
单细胞数据复现-肺癌文章代码复现2https://cloud.tencent.com/developer/article/1995619
单细胞数据复现-肺癌文章代码复现3https://cloud.tencent.com/developer/article/1996043
前面是主要对epi细胞进行的基本的分析。
选取癌症的样本进行分析
代码语言:txt复制##subset抓取部分的数据集的内容
epi_tumor <- subset(subset(epi_anno, subset = cluster_type == "Tumor"), subset = tissue_type == "Tumor")
epi_tumor <- ScaleData(epi_tumor)
###tumor cell marker genes
Idents(epi_tumor) <- epi_tumor@meta.data$patient_id
markers <- FindAllMarkers(epi_tumor, only.pos = T, min.pct = 0.25, min.diff.pct = 0.25)
##摘取top基因,更改wt = avg_logFC,先查看markers gene的参数
top_TC_markers <- markers %>% group_by(cluster) %>% top_n(10, wt = avg_log2FC)
##这个颜色相对于比seurat原包的颜色较为好看
DoHeatmap(epi_tumor, features = top_TC_markers$gene, group.by = "patient_id", draw.lines = F, group.colors = use_colors)
scale_fill_viridis()
ggsave2("HeatMap_Tumor.pdf", path = "output/fig2", width = 30, height = 30, units = "cm")
ggsave2("Fig2C.png", path = "../results", width = 30, height = 30, units = "cm")
分析有丝分裂的发生基因
代码语言:txt复制mitotic_activity <- FetchData(epi_tumor, c("tissue_type", "cell_type_epi", "Phase", "sample_id")) %>%
mutate(sample_id = factor(sample_id, levels = c("p034t", "p033t", "p032t", "p031t", "p030t", "p027t", "p024t", "p023t", "p019t", "p018t")))
ggplot(mitotic_activity, aes(x = sample_id, fill = Phase))
geom_bar(position = "fill", width = 0.75)
scale_fill_manual(values = use_colors)
coord_flip()
ggsave2("SuppFiqg3D.pdf", path = "../results", width = 12, height = 10, units = "cm")
progeny pathway scores
代码语言:txt复制#clustered heatmap progeny scores
##gather做数据清洗的
progeny_scores <- as.data.frame(t(GetAssayData(epi_tumor, assay = "progeny", slot = "scale.data")))
progeny_scores$cell_id <- rownames(progeny_scores)
progeny_scores <- gather(progeny_scores, Pathway, Activity, -cell_id)
cells_clusters <- FetchData(epi_tumor, c("sample_id", "cluster_type")) %>% filter(str_detect(sample_id, "t"))
cells_clusters$cell_id <- rownames(cells_clusters)
##full_join并集;inner_join交集
progeny_scores <- inner_join(progeny_scores, cells_clusters)
summarized_progeny_scores <- progeny_scores %>%
group_by(Pathway, sample_id) %>%
summarise(avg = mean(Activity), std = sd(Activity)) %>%
pivot_wider(id_cols = Pathway, names_from = sample_id, values_from = avg) %>%
column_to_rownames("Pathway") %>%
as.matrix()
##这里画图的时候我发现还是ggsave相对好一些,有的时候捕捉到不到图片
pdf("../results/Fig2E.pdf", width = 6, height = 8)
heatmap.2(summarized_progeny_scores, trace = "none", density.info = "none", col = bluered(100))
dev.off()
correlation mutational status ~ progeny scores
代码语言:javascript复制summarized_progeny_scores_mutations <- summarized_progeny_scores %>%
t() %>%
as.data.frame()
##mutate()R语言中的函数用于在DATAframe中添加新变量
summarized_progeny_scores_mutations$Sample <- rownames(summarized_progeny_scores_mutations)
summarized_progeny_scores_mutations <- summarized_progeny_scores_mutations %>%
mutate(KRAS_status = ifelse(Sample %in% c("p018t", "p023t", "p030t", "p031t", "p032t", "p033t"), "mutated", "wildtype")) %>%
mutate(TP53_status = ifelse(Sample %in% c("p023t", "p027t"), "mutated", "wildtype")) %>%
mutate(PIK3CA_status = ifelse(Sample %in% c("p031t"), "mutated", "wildtype")) %>%
mutate(KRAS_status = factor(TP53_status, levels = c("wildtype", "mutated"))) %>%
mutate(TP53_status = factor(TP53_status, levels = c("wildtype", "mutated")))
ggplot(summarized_progeny_scores_mutations, aes(x = KRAS_status, y = MAPK))
geom_boxplot()
ggtitle(paste0(t.test(formula = MAPK~KRAS_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_KRAS~MAPK.pdf", path = "../results", width = 8, height = 8, units = "cm")
ggplot(summarized_progeny_scores_mutations, aes(x = KRAS_status, y = EGFR))
geom_boxplot()
ggtitle(paste0(t.test(formula = EGFR~KRAS_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_KRAS~EGFR.pdf", path = "../results", width = 8, height = 8, units = "cm")
ggplot(summarized_progeny_scores_mutations, aes(x = TP53_status, y = p53))
geom_boxplot()
ggtitle(paste0(t.test(formula = p53~TP53_status, data = summarized_progeny_scores_mutations, alternative = "two.sided", paired = F)$p.value))
ggsave2("SuppFig3C_p53~TP53.pdf", path = "../results", width = 8, height = 8, units = "cm")
rerun PCA
代码语言:javascript复制// epi_pca <- epi_tumor
epi_pca <- RunPCA(epi_pca)
DimPlot(epi_pca, reduction = "pca", group.by = "patient_id", dims = c(1,2))
DimPlot(epi_pca, reduction = "pca", group.by = "patient_id", dims = c(3,4))
DimPlot(epi_pca, reduction = "pca", group.by = "histo_subtype")
DimHeatmap(epi_pca, dims = 1, cells = 1000, balanced = T, fast = F, nfeatures = 60)
scale_fill_viridis()
#ggsave2("DimHeatmap_epitumor_PC1.pdf", path = "output/fig2", width = 10, height = 20, units = "cm")
ggsave2("SuppFig3F.png", path = "../results", width = 10, height = 20, units = "cm")
### low-dimensional UMAPs
DimPlot(epi_pca, group.by = "patient_id")
DimPlot(epi_pca, group.by = "histo_subtype")
##可以用于后续的服务器R文件的保存
for (i in c(4, 6, 8)){
umaptest <- RunUMAP(epi_pca, dims = 1:i, verbose = F)
print(DimPlot(umaptest, reduction = "umap", group.by = "patient_id", cols = use_colors) labs(title = paste0(i, " dimensions")) coord_fixed())
ggsave2(paste0("SuppFig3E_", i, "dimensions_patient.png"), path = "../results", width = 15, height = 10, units = "cm")
print(DimPlot(umaptest, reduction = "umap", group.by = "histo_subtype", cols = use_colors) labs(title = paste0(i, " dimensions")) coord_fixed())
ggsave2(paste0("SuppFig3E_", i, "dimensions_histo.png"), path = "../results", width = 15, height = 10, units = "cm")
print(FeaturePlot(umaptest, features = "PC_1", order = T, cols = viridis(10)) labs(title = paste0(i, " dimensions")) coord_fixed())
ggsave2(paste0("SuppFig3E_", i, "dimensions_PC1.png"), path = "../results", width = 15, height = 10, units = "cm")
remove(umaptest)
}
### bin cells along differentation gradients
# tumor cell signatures "alveolar/club-like" = tumor_signature, "undifferentiated" = anti_tumor_signature
epi_pca <- AddModuleScore(epi_pca, features = list(c("SCGB3A1", "PIGR", "NAPSA", "C4BPA", "SCGB3A2", "HLA-DRA", "CD74", "ADGRF5", "C16orf89", "FOLR1", "SELENBP1", "HLA-DRB1", "ID4", "MGP", "AQP3", "CA2", "LHX9", "HLA-DPB1", "FMO5", "GKN2", "C5", "MUC1", "NPC2", "RNASE1", "PIK3C2G", "SFTA2", "SLC34A2", "HLA-DPA1", "FGFR3", "PGC")), name = "tumor_signature")
epi_pca <- AddModuleScore(epi_pca, features = list(c("DSG2", "CAMK2N1", "FAM3C", "KRT7", "IFI27", "SLC2A1", "MARCKS", "PLAU", "AHNAK2", "PERP", "S100A4", "KRT19", "COL6A1", "UACA", "COL17A1", "CDA", "TPM2", "S100A16", "KRT8", "PRSS23", "DST", "LAMC2", "S100P", "PRSS3", "LAMA3", "DSP", "ITGA3", "MDK", "FAM83A", "ITGB4")), name = "anti_tumor_signature")
epi_tumor_data <- FetchData(epi_pca, c(colnames(epi_pca@meta.data), paste0("PC_", 1:10)))
progeny_scores_data <- as.data.frame(t(GetAssayData(epi_tumor, assay = "progeny", slot = "scale.data")))
progeny_scores_data$cell_id <- rownames(progeny_scores_data)
epi_tumor_data <- full_join(epi_tumor_data, progeny_scores_data, by = "cell_id")
##rownames:接受参数(向量),类矩阵的对象
##cut_number:切割函数
progeny_names <- rownames(epi_pca@assays$progeny)
epi_tumor_data$bin <- cut_number(epi_tumor_data$PC_1, n = 10, labels = c(1:10))
ggplot(epi_tumor_data, mapping = aes(x = bin))
geom_bar()
# patients along PC1
ggplot(epi_tumor_data, mapping = aes(x = bin, fill = factor(patient_id, levels = c("p032", "p018", "p019", "p024", "p031", "p030", "p033", "p023", "p027", "p034"))))
geom_bar()
theme(legend.title = element_blank())
scale_fill_manual(values = use_colors)
ggsave2("SuppFig3G.pdf", path = "../results", width = 30, height = 30, units = "cm")
# histological subtypes along PC1
ggplot(epi_tumor_data, mapping = aes(x = bin, fill = factor(histo_subtype, levels = c("lepidic", "acinar", "mucinuous (papillary)", "(micro)papillary", "solid", "sarcomatoid"))))
geom_bar()
theme(legend.title = element_blank())
scale_fill_manual(values = use_colors)
ggsave2("Fig2F.pdf", path = "../results", width = 30, height = 30, units = "cm")
# cell cycle phase along PC1
ggplot(epi_tumor_data, mapping = aes(x = bin, fill = Phase))
geom_bar()
scale_fill_manual(values = use_colors)
ggsave2("SuppFig3H.pdf", path = "../results", width = 30, height = 30, units = "cm")
# normal cell type signatures along PC1
##猖狂数据互换函数pivot_longer
epi_tumor_data %>%
select(c(bin, VB_Basal.11, VB_Basal.22, VB_Ciliated.13, VB_Ciliated.24, VB_Club5, VB_Goblet.26, VB_Goblet.17, VB_Ionocytes8, VB_Type.2.alveolar9, VB_Type.1.alveolar10)) %>%
pivot_longer(cols = c(VB_Basal.11, VB_Basal.22, VB_Ciliated.13, VB_Ciliated.24, VB_Club5, VB_Goblet.26, VB_Goblet.17, VB_Ionocytes8, VB_Type.2.alveolar9, VB_Type.1.alveolar10), names_to = "cell_type") %>%
group_by(bin, cell_type) %>%
summarise(mean = mean(value), sd = sd(value)) %>%
ggplot(aes(x = as.numeric(bin), y = mean, color = cell_type))
geom_line(size = 1)
scale_color_brewer(type = "qual", palette = "Paired")
scale_x_continuous(breaks = c(1:20))
ggsave2("Fig2G.pdf", path = "../results", width = 30, height = 30, units = "cm")
# progeny pathway scores along PC1
getcolors <- colorRampPalette(brewer.pal(14, "Dark2"))
epi_tumor_data %>%
select(bin, progeny_names) %>%
pivot_longer(cols = progeny_names, names_to = "progeny") %>%
group_by(bin, progeny) %>%
summarise(mean = mean(value), sd = sd(value)) %>%
ggplot(aes(x = as.numeric(bin), y = mean, color = progeny))
geom_line(size = 1)
scale_color_manual(values = getcolors(14))
scale_x_continuous(breaks = c(1:20))
ggsave2("Fig2H.pdf", path = "../results", width = 30, height = 30, units = "cm")
# tumor cell signatures along PC1
epi_tumor_data %>%
select(bin, tumor_signature1, anti_tumor_signature1) %>%
pivot_longer(cols = c(tumor_signature1, anti_tumor_signature1), names_to = "genes") %>%
group_by(bin, genes) %>%
summarise(mean = mean(value), sd = sd(value)) %>%
ggplot(aes(x = as.numeric(bin), y = mean, color = genes))
geom_line(size = 1)
geom_errorbar(aes(ymin=mean-sd, ymax=mean sd), width = 0.3)
scale_x_continuous(breaks = c(1:20))
ggsave2("SuppFig3I.pdf", path = "../results", width = 30, height = 30, units = "cm")