导语
GUIDE ╲
MutationalPatterns适用于单核苷酸变体 (SNV)、插入和缺失 (Indel)、双碱基替换 (DBS) 和更大的多碱基替换 (MBS)。
背景介绍
突变过程在基因组 DNA 中留下特征足迹。MutationalPatterns提供了一套全面的灵活函数,使研究人员能够轻松评估和可视化碱基替换目录中的多种突变模式,例如健康样本、肿瘤样本或 DNA 修复缺陷细胞。
MutationalPatterns已经更新到第二个版本,添加了许多新功能,并增强了先前版本的功能。涵盖了广泛的模式,包括:突变特征、转录和复制链偏差、病变分离、基因组分布以及与基因组特征的关联,这些对于研究突变过程的活动具有共同意义。
R包安装
代码语言:javascript复制if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("MutationalPatterns")
library(MutationalPatterns)
分析流程
01
数据准备
参考基因组
代码语言:javascript复制library(BSgenome)
head(available.genomes())
代码语言:javascript复制ref_genome <- "BSgenome.Hsapiens.UCSC.hg19"
library(ref_genome, character.only = TRUE)
加载 SNVs示例数据
使用R包自带的两个示例数据集
代码语言:javascript复制vcf_files <- list.files(system.file("extdata", package = "MutationalPatterns"),
pattern = "sample.vcf", full.names = TRUE
)
sample_names <- c(
"colon1", "colon2", "colon3",
"intestine1", "intestine2", "intestine3",
"liver1", "liver2", "liver3"
)
grl <- read_vcfs_as_granges(vcf_files, sample_names, ref_genome)
tissue <- c(rep("colon", 3), rep("intestine", 3), rep("liver", 3))
加载indels、DBSs 和 MBSs示例数据
代码语言:javascript复制blood_vcf_fnames <- list.files(
system.file("extdata", package = "MutationalPatterns"),
pattern = "blood.*vcf", full.names = TRUE)
blood_sample_names <- c("blood1", "blood2", "blood3")
blood_grl <- read_vcfs_as_granges(blood_vcf_fnames, blood_sample_names,
ref_genome, type = "all")
snv_grl <- get_mut_type(blood_grl, type = "snv")
indel_grl <- get_mut_type(blood_grl, type = "indel")
dbs_grl <- get_mut_type(blood_grl, type = "dbs")
mbs_grl <- get_mut_type(blood_grl, type = "mbs")
indel_grl <- read_vcfs_as_granges(blood_vcf_fnames, blood_sample_names,
ref_genome, type = "indel")
predefined_dbs_grl <- read_vcfs_as_granges(blood_vcf_fnames, blood_sample_names,
ref_genome, type = "dbs",
predefined_dbs_mbs = TRUE)
02
突变特征
SNVs
代码语言:javascript复制muts <- mutations_from_vcf(grl[[1]])
head(muts, 12)
代码语言:javascript复制types <- mut_type(grl[[1]]
context <- mut_context(grl[[1]], ref_genome)
type_context <- type_context(grl[[1]], ref_genome)
lapply(type_context, head, 12)
type_occurrences <- mut_type_occurrences(grl, ref_genome)
type_occurrences
突变谱
突变谱显示了每个突变类型在碱基替换目录中的相对贡献。plot_spectrum 函数绘制 6 种碱基替换类型中每一种对所有样本的平均相对贡献。误差线表示所有样本的 95% 置信区间。
代码语言:javascript复制p1 <- plot_spectrum(type_occurrences)
p2 <- plot_spectrum(type_occurrences, CT = TRUE)
p3 <- plot_spectrum(type_occurrences, CT = TRUE,
indv_points = TRUE, legend = FALSE)
library("gridExtra")
grid.arrange(p1, p2, p3, ncol = 3, widths = c(3, 3, 1.75))
代码语言:javascript复制p4 <- plot_spectrum(type_occurrences, by = tissue, CT = TRUE, legend = TRUE)
p5 <- plot_spectrum(type_occurrences, CT = TRUE,
legend = TRUE, error_bars = "stdev")
grid.arrange(p4, p5, ncol = 2, widths = c(4, 2.3))
更大的contexts
也可以查看更大的contexts,这仅在您有大量突变时才有用
代码语言:javascript复制mut_mat_ext_context <- mut_matrix(grl, ref_genome, extension = 2)
plot_profile_heatmap(mut_mat_ext_context, by = tissue)
plot_river(mut_mat_ext_context[,c(1,4)])
Indels
代码语言:javascript复制indel_grl <- get_indel_context(indel_grl, ref_genome)
indel_counts <- count_indel_contexts(indel_grl)
plot_indel_contexts(indel_counts, condensed = TRUE)
plot_main_indel_contexts(indel_counts)
DBCs
代码语言:javascript复制dbs_grl <- get_dbs_context(dbs_grl)
dbs_counts <- count_dbs_contexts(dbs_grl)
plot_dbs_contexts(dbs_counts, same_y = TRUE)
plot_main_dbs_contexts(dbs_counts, same_y = TRUE)
03
Mutational signature
使用 NMF 从头提取突变特征
代码语言:javascript复制mut_mat <- mut_matrix(vcf_list = grl, ref_genome = ref_genome)
head(mut_mat)
mut_mat <- mut_mat 0.0001
library("NMF")
estimate <- nmf(mut_mat, rank = 2:5, method = "brunet",
nrun = 10, seed = 123456, .opt = "v-p")
plot(estimate)
代码语言:javascript复制nmf_res <- extract_signatures(mut_mat, rank = 2, nrun = 10, single_core = TRUE)
combi_mat = rbind(indel_counts, dbs_counts)
nmf_res_combi <- extract_signatures(combi_mat, rank = 2, nrun = 10, single_core = TRUE)
可视化NMF结果
代码语言:javascript复制colnames(nmf_res$signatures) <- c("Signature A", "Signature B")
rownames(nmf_res$contribution) <- c("Signature A", "Signature B")
signatures = get_known_signatures()
nmf_res <- rename_nmf_signatures(nmf_res, signatures, cutoff = 0.85)
colnames(nmf_res$signatures)
plot_96_profile(nmf_res$signatures, condensed = TRUE)
代码语言:javascript复制plot_contribution(nmf_res$contribution, nmf_res$signature,
mode = "relative"
)
代码语言:javascript复制plot_contribution_heatmap(nmf_res$contribution,
cluster_samples = TRUE,
cluster_sigs = TRUE)
代码语言:javascript复制plot_original_vs_reconstructed(mut_mat, nmf_res$reconstructed,
y_intercept = 0.95)
Signature refitting
代码语言:javascript复制fit_res <- fit_to_signatures(mut_mat, signatures)
plot_contribution(fit_res$contribution,
coord_flip = FALSE,
mode = "absolute"
)
Bootstrapped refitting
signature重新拟合的稳定性可能不是最理想的, Bootstrapping 可用于验证改装的稳定性,使结果更具有置信度。
代码语言:javascript复制contri_boots <- fit_to_signatures_bootstrapped(mut_mat[, c(3, 7)],
signatures,
n_boots = 50,
method = "strict"
)
plot_bootstrapped_contribution(contri_boots)
代码语言:javascript复制plot_bootstrapped_contribution(contri_boots,
mode = "relative",
plot_type = "dotplot")
代码语言:javascript复制fig_list <- plot_correlation_bootstrap(contri_boots)
fig_list[[2]]
突变谱和特征之间的相似性
代码语言:javascript复制cos_sim(mut_mat[, 1], signatures[, 1])
cos_sim_samples_signatures <- cos_sim_matrix(mut_mat, signatures)
cos_sim_samples_signatures[1:3, 1:3]
plot_cosine_heatmap(cos_sim_samples_signatures,
cluster_rows = TRUE, cluster_cols = TRUE)
代码语言:javascript复制cos_sim_samples <- cos_sim_matrix(mut_mat, mut_mat)
plot_cosine_heatmap(cos_sim_samples, cluster_rows = TRUE, cluster_cols = TRUE)
04
基因组分布
降雨图
降雨图显示了突变类型和交互距离,可用于可视化基因组或染色体子集的突变分布。
代码语言:javascript复制# Define autosomal chromosomes
chromosomes <- seqnames(get(ref_genome))[1:22]
# Make a rainfall plot
plot_rainfall(grl[[1]],
title = names(grl[1]),
chromosomes = chromosomes, cex = 1.5, ylim = 1e 09
)
基因组区域的突变模式
代码语言:javascript复制library(biomaRt)
CTCF_g <- readRDS(system.file("states/CTCF_g_data.rds",
package = "MutationalPatterns"
))
promoter_g <- readRDS(system.file("states/promoter_g_data.rds",
package = "MutationalPatterns"
))
flanking_g <- readRDS(system.file("states/promoter_flanking_g_data.rds",
package = "MutationalPatterns"
))
regions <- GRangesList(promoter_g, flanking_g, CTCF_g)
names(regions) <- c("Promoter", "Promoter flanking", "CTCF")
seqlevelsStyle(regions) <- "UCSC"
grl_region <- split_muts_region(grl, regions)
names(grl_region)
mut_mat_region <- mut_matrix(grl_region, ref_genome)
nmf_res_region <- extract_signatures(mut_mat_region, rank = 2, nrun = 10, single_core = TRUE)
nmf_res_region <- rename_nmf_signatures(nmf_res_region,
signatures,
cutoff = 0.85)
plot_contribution_heatmap(nmf_res_region$contribution,
cluster_samples = TRUE,
cluster_sigs = TRUE)
Mutation Spectrum
代码语言:javascript复制type_occurrences_region <- mut_type_occurrences(grl_region, ref_genome)
plot_spectrum_region(type_occurrences_region)
代码语言:javascript复制plot_spectrum_region(type_occurrences_region, mode = "relative_sample")
小编总结
MutationalPatterns的新版本在多种突变分析中均有强大的功能,并且能够进行高质量的绘图,小伙伴们可以先用示例数据集去尝试,然后再应用到我们自己的数据中哦!