MutationalPatterns--进行肿瘤突变分析!

2022-03-29 10:39:20 浏览数 (1)

导语

GUIDE ╲

MutationalPatterns适用于单核苷酸变体 (SNV)、插入和缺失 (Indel)、双碱基替换 (DBS) 和更大的多碱基替换 (MBS)。

背景介绍

突变过程在基因组 DNA 中留下特征足迹。MutationalPatterns提供了一套全面的灵活函数,使研究人员能够轻松评估和可视化碱基替换目录中的多种突变模式,例如健康样本、肿瘤样本或 DNA 修复缺陷细胞。

MutationalPatterns已经更新到第二个版本,添加了许多新功能,并增强了先前版本的功能。涵盖了广泛的模式,包括:突变特征、转录和复制链偏差、病变分离、基因组分布以及与基因组特征的关联,这些对于研究突变过程的活动具有共同意义。

R包安装

代码语言:javascript复制
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("MutationalPatterns")
library(MutationalPatterns)

分析流程

01

数据准备

参考基因组

代码语言:javascript复制
library(BSgenome)
head(available.genomes())
代码语言:javascript复制
ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
library(ref_genome, character.only = TRUE)

加载 SNVs示例数据

使用R包自带的两个示例数据集

代码语言:javascript复制
vcf_files <- list.files(system.file("extdata", package = "MutationalPatterns"),
                        pattern = "sample.vcf", full.names = TRUE
)
sample_names <- c(
  "colon1", "colon2", "colon3",
  "intestine1", "intestine2", "intestine3",
  "liver1", "liver2", "liver3"
)
grl <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome)
tissue <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))

加载indels、DBSs 和 MBSs示例数据

代码语言:javascript复制
blood_vcf_fnames <- list.files(
  system.file("extdata", package = "MutationalPatterns"), 
  pattern = "blood.*vcf", full.names = TRUE)
blood_sample_names <- c("blood1", "blood2", "blood3")
blood_grl <- read_vcfs_as_granges(blood_vcf_fnames, blood_sample_names, 
                                  ref_genome, type = "all")
snv_grl <- get_mut_type(blood_grl, type = "snv")
indel_grl <- get_mut_type(blood_grl, type = "indel")
dbs_grl <- get_mut_type(blood_grl, type = "dbs")
mbs_grl <- get_mut_type(blood_grl, type = "mbs")
indel_grl <- read_vcfs_as_granges(blood_vcf_fnames, blood_sample_names, 
                                  ref_genome, type = "indel")
predefined_dbs_grl <- read_vcfs_as_granges(blood_vcf_fnames, blood_sample_names, 
                                           ref_genome, type = "dbs",
                                           predefined_dbs_mbs = TRUE)

02

突变特征

SNVs

代码语言:javascript复制
muts <- mutations_from_vcf(grl[[1]])
head(muts, 12)
代码语言:javascript复制
types <- mut_type(grl[[1]]
context <- mut_context(grl[[1]], ref_genome)
type_context <- type_context(grl[[1]], ref_genome)
lapply(type_context, head, 12)
type_occurrences <- mut_type_occurrences(grl, ref_genome)
type_occurrences

突变谱

突变谱显示了每个突变类型在碱基替换目录中的相对贡献。plot_spectrum 函数绘制 6 种碱基替换类型中每一种对所有样本的平均相对贡献。误差线表示所有样本的 95% 置信区间。

代码语言:javascript复制
p1 <- plot_spectrum(type_occurrences)
p2 <- plot_spectrum(type_occurrences, CT = TRUE)
p3 <- plot_spectrum(type_occurrences, CT = TRUE, 
                    indv_points = TRUE, legend = FALSE)
library("gridExtra")
grid.arrange(p1, p2, p3, ncol = 3, widths = c(3, 3, 1.75))
代码语言:javascript复制
p4 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE, legend = TRUE)
p5 <- plot_spectrum(type_occurrences, CT = TRUE, 
                    legend = TRUE, error_bars = "stdev")
grid.arrange(p4, p5, ncol = 2, widths = c(4, 2.3))

更大的contexts

也可以查看更大的contexts,这仅在您有大量突变时才有用

代码语言:javascript复制
mut_mat_ext_context <- mut_matrix(grl, ref_genome, extension = 2)
plot_profile_heatmap(mut_mat_ext_context, by = tissue)
plot_river(mut_mat_ext_context[,c(1,4)])

Indels

代码语言:javascript复制
indel_grl <- get_indel_context(indel_grl, ref_genome)
indel_counts <- count_indel_contexts(indel_grl)
plot_indel_contexts(indel_counts, condensed = TRUE)
plot_main_indel_contexts(indel_counts)

DBCs

代码语言:javascript复制
dbs_grl <- get_dbs_context(dbs_grl)
dbs_counts <- count_dbs_contexts(dbs_grl)
plot_dbs_contexts(dbs_counts, same_y = TRUE)
plot_main_dbs_contexts(dbs_counts, same_y = TRUE)

03

Mutational signature

使用 NMF 从头提取突变特征

代码语言:javascript复制
mut_mat <- mut_matrix(vcf_list = grl, ref_genome = ref_genome)
head(mut_mat)
mut_mat <- mut_mat   0.0001
library("NMF")
estimate <- nmf(mut_mat, rank = 2:5, method = "brunet", 
                nrun = 10, seed = 123456, .opt = "v-p")
plot(estimate)
代码语言:javascript复制
nmf_res <- extract_signatures(mut_mat, rank = 2, nrun = 10, single_core = TRUE)
combi_mat = rbind(indel_counts, dbs_counts)
nmf_res_combi <- extract_signatures(combi_mat, rank = 2, nrun = 10, single_core = TRUE)

可视化NMF结果

代码语言:javascript复制
colnames(nmf_res$signatures) <- c("Signature A", "Signature B")
rownames(nmf_res$contribution) <- c("Signature A", "Signature B")
signatures = get_known_signatures()
nmf_res <- rename_nmf_signatures(nmf_res, signatures, cutoff = 0.85)
colnames(nmf_res$signatures)
plot_96_profile(nmf_res$signatures, condensed = TRUE)
代码语言:javascript复制
plot_contribution(nmf_res$contribution, nmf_res$signature,
  mode = "relative"
)
代码语言:javascript复制
plot_contribution_heatmap(nmf_res$contribution, 
                          cluster_samples = TRUE, 
                          cluster_sigs = TRUE)
代码语言:javascript复制
plot_original_vs_reconstructed(mut_mat, nmf_res$reconstructed, 
                               y_intercept = 0.95)

Signature refitting

代码语言:javascript复制
fit_res <- fit_to_signatures(mut_mat, signatures)
plot_contribution(fit_res$contribution,
                  coord_flip = FALSE,
                  mode = "absolute"
)

Bootstrapped refitting

signature重新拟合的稳定性可能不是最理想的, Bootstrapping 可用于验证改装的稳定性,使结果更具有置信度。

代码语言:javascript复制
contri_boots <- fit_to_signatures_bootstrapped(mut_mat[, c(3, 7)],
                                               signatures,
                                               n_boots = 50,
                                               method = "strict"
)
plot_bootstrapped_contribution(contri_boots)
代码语言:javascript复制
plot_bootstrapped_contribution(contri_boots, 
                               mode = "relative", 
                               plot_type = "dotplot")
代码语言:javascript复制
fig_list <- plot_correlation_bootstrap(contri_boots)
fig_list[[2]]

突变谱和特征之间的相似性

代码语言:javascript复制
cos_sim(mut_mat[, 1], signatures[, 1])
cos_sim_samples_signatures <- cos_sim_matrix(mut_mat, signatures)
cos_sim_samples_signatures[1:3, 1:3]
plot_cosine_heatmap(cos_sim_samples_signatures, 
                    cluster_rows = TRUE, cluster_cols = TRUE)
代码语言:javascript复制
cos_sim_samples <- cos_sim_matrix(mut_mat, mut_mat)
plot_cosine_heatmap(cos_sim_samples, cluster_rows = TRUE, cluster_cols = TRUE)

04

基因组分布

降雨图

降雨图显示了突变类型和交互距离,可用于可视化基因组或染色体子集的突变分布。

代码语言:javascript复制
# Define autosomal chromosomes
chromosomes <- seqnames(get(ref_genome))[1:22]

# Make a rainfall plot
plot_rainfall(grl[[1]],
  title = names(grl[1]),
  chromosomes = chromosomes, cex = 1.5, ylim = 1e 09
)

基因组区域的突变模式

代码语言:javascript复制
library(biomaRt)
CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
                              package = "MutationalPatterns"
))
promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
                                  package = "MutationalPatterns"
))
flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
                                  package = "MutationalPatterns"
))
regions <- GRangesList(promoter_g, flanking_g, CTCF_g)

names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
seqlevelsStyle(regions) <- "UCSC"
grl_region <- split_muts_region(grl, regions)
names(grl_region)
mut_mat_region <- mut_matrix(grl_region, ref_genome)
nmf_res_region <- extract_signatures(mut_mat_region, rank = 2, nrun = 10, single_core = TRUE)
nmf_res_region <- rename_nmf_signatures(nmf_res_region, 
                                        signatures, 
                                        cutoff = 0.85)
plot_contribution_heatmap(nmf_res_region$contribution, 
                          cluster_samples = TRUE, 
                          cluster_sigs = TRUE)

Mutation Spectrum

代码语言:javascript复制
type_occurrences_region <- mut_type_occurrences(grl_region, ref_genome)
plot_spectrum_region(type_occurrences_region)
代码语言:javascript复制
plot_spectrum_region(type_occurrences_region, mode = "relative_sample")

小编总结

MutationalPatterns的新版本在多种突变分析中均有强大的功能,并且能够进行高质量的绘图,小伙伴们可以先用示例数据集去尝试,然后再应用到我们自己的数据中哦!

0 人点赞