突变signature分析你不能错过的R包!

2022-03-29 08:10:11 浏览数 (2)

今天给大家带来的是signature分析的R包“YAPSA”,让大家在分析signature的时候多一个选择,增加绘图展示的多样性,最重要的是让你的老板知道你有多优秀。

大家知道前文分享的“deconstructSigs”(点击看往期详情)也是突变signature的绘图软件,好奇的小伙伴就有疑问了,这俩要是一样就没必要再学了吧?不要偷懒,这两个软件目标是一样的,但是还是不一样的。第一,”与“deconstructSigs”的相比,“YAPSA”用的是Linear Combination Decomposition (LCD)线性组合分解的方法来预测signature,而“deconstructSigs”是利用非负矩阵分解(Nonnegative Matrix Factor)的方法预测;另外,“YAPSA”方法展示可通过阈值和颜色进行调整,较为灵活;再者,“YAPSA”运行更快和所需的资源更少(个人感觉,在突变不是很多的情况下,速度没显著差别)。这两款R包均可选择signature.nature2013 或 signature.COSMIC作为已知signature进行相关性计算。

下边全是干货,请认真阅读,广泛传播!

1、加载包

代码语言:javascript复制
library(YAPSA)library(knitr)
opts_chunk$set(echo=TRUE)
opts_chunk$set(fig.show='asis')
library(BSgenome.Hsapiens.UCSC.hg19)#注意基因组版本

2、准备数据

3、运行R代码

代码语言:javascript复制
#读取突变文件
data<-read.table(file="C:/Users/snp_mutation.txt",header=T,check.names=F)
#提取SNV序列长度
word_length <- 3
data1 <- 
 create_mutation_catalogue_from_df(
         data,
         this_seqnames.field = "CHROM", this_start.field = "POS",
         this_end.field = "POS", this_PID.field = "PID",
         this_subgroup.field = "SUBGROUP",
         this_refGenome = BSgenome.Hsapiens.UCSC.hg19,#注意基因组版本要与SNV calling 的基因组保持一致。
         this_wordLength = word_length)
names(data1)
data_df <- as.data.frame(data1$matrix)

#读入COSMIC_signature(也可选择另外一个cosmic_nature_2013)
Alex_COSMIC_signatures_path <- 
  paste0("http://cancer.sanger.ac.uk/cancergenome/",
         "assets/signatures_probabilities.txt")
AlexCosmicValid_sig_df <- read.csv(Alex_COSMIC_signatures_path,
                                      header=TRUE,sep="t")
Alex_COSMIC_rownames <- paste(AlexCosmicValid_sig_df[,1],
                              AlexCosmicValid_sig_df[,2],sep=" ")
COSMIC_select_ind <- grep("Signature",names(AlexCosmicValid_sig_df))
AlexCosmicValid_sig_df <- AlexCosmicValid_sig_df[,COSMIC_select_ind]
number_of_Alex_COSMIC_sigs <- dim(AlexCosmicValid_sig_df)[2]
names(AlexCosmicValid_sig_df) <- gsub("Signature\.","AC",
                                         names(AlexCosmicValid_sig_df))
rownames(AlexCosmicValid_sig_df) <- Alex_COSMIC_rownames

#对30个signature进行颜色设置
COSMIC_signature_colour_vector <- 
c("green","pink","goldenrod",
    "lightblue","blue","orangered","yellow",
    "orange","brown","purple","red",
    "darkblue","magenta","maroon",
    "yellowgreen","violet","lightgreen",
    "sienna4","deeppink","darkorchid",
    "seagreen","grey","darkgrey",
    "black","yellow4","coral2","chocolate2",
    "navyblue","plum","springgreen")
#30个signature的生物学意义
COSMIC_bio_process_vector <- 
c("spontaneous deamination","APOBEC",
  "defect DNA DSB repair hom. recomb.",
  "tobacco mutatgens, benzo(a)pyrene",
  "unknown",
  "defect DNA MMR, found in MSI tumors",
  "UV light exposure","unknown","POL eta and SHM",
  "altered POL E",
  "alkylating agents, temozolomide",
  "unknown","APOBEC","unknown",
  "defect DNA MMR","unknown","unknown",
  "unknown","unknown",
  "associated w. small indels at repeats",
  "unknown","aristocholic acid","unknown",
  "aflatoxin","unknown","defect DNA MMR",
   "unknown","unknown","tobacco chewing","unknown")
AlexCosmicValid_sigInd_df <- data.frame(sig=colnames(AlexCosmicValid_sig_df))
AlexCosmicValid_sigInd_df$index <- seq_len(dim(AlexCosmicValid_sigInd_df)[1])
AlexCosmicValid_sigInd_df$colour <- COSMIC_signature_colour_vector
AlexCosmicValid_sigInd_df$process <- COSMIC_bio_process_vector

current_sig_df <- AlexCosmicValid_sig_df
current_sigInd_df <- AlexCosmicValid_sigInd_df
COSMICExposures_df <-
  LCD(data_df,current_sig_df)

COSMIC_subgroups_df <- 
     make_subgroups_df(data,
     COSMICExposures_df)
#绘图
exposures_barplot(
 in_exposures_df = COSMICExposures_df,
 in_subgroups_df = COSMIC_subgroups_df)

绘图展示如下

代码语言:javascript复制
#换颜色绘图
 exposures_barplot(
  in_exposures_df = COSMICExposures_df,
  in_signatures_ind_df = current_sigInd_df,
  in_subgroups_df = COSMIC_subgroups_df)

绘图展示如下

代码语言:javascript复制
#设置阈值绘图,低于阈值的将过滤掉,不会展示在图上。
my_cutoff <- 0.06
general_cutoff_vector <- rep(my_cutoff,dim(current_sig_df)[2])
CosmicValid_cutoffGen_LCDlist <- LCD_complex_cutoff(
  in_mutation_catalogue_df = data_df,
  in_signatures_df = current_sig_df,
  in_cutoff_vector = general_cutoff_vector,
  in_sig_ind_df = current_sigInd_df)
  
 exposures_barplot(
    in_exposures_df = CosmicValid_cutoffGen_LCDlist$exposures,
    in_signatures_ind_df = CosmicValid_cutoffGen_LCDlist$out_sig_ind_df,
    in_subgroups_df = COSMIC_subgroups_df)

过滤阈值后结果如下

代码语言:javascript复制
#绘图标准化
exposures_barplot(
  in_exposures_df = CosmicValid_cutoffGen_LCDlist$norm_exposures,
  in_signatures_ind_df = CosmicValid_cutoffGen_LCDlist$out_sig_ind_df,
  in_subgroups_df = COSMIC_subgroups_df)

过滤阈值标准化结果如下

这个R包绘图到此就介绍完了,小伙伴们是不是很想试一试呢?那么小编再次在提醒一下注意事项:1、注意使用的signature选择;2注意基因组版本的使用。

0 人点赞