绘图代码|多组学数据可视化的高端玩法

2020-08-29 17:50:21 浏览数 (2)

今天给大家介绍一款发表在Nature Methods(IF=28.467)上的R包--trackViewer,可以对基因结构,SNP/突变结果,甲基化结果,组蛋白结果等进行高颜值的可视化,让你那些无处安放的多组学数据完美展示出来。

那么如何来实现呢,小编带着大家一步步来。

第一步:安装R包并导入

代码语言:javascript复制
if (!requireNamespace("BiocManager", quietly = TRUE))install.packages("BiocManager")BiocManager::install("trackViewer")library(trackViewer)

第二步:绘制第一层gene结构

为了让大家更清楚每一步细节,小编在这里从构建基因结构的数据开始讲起。需要事先安装并加载一些相关的R包:

代码语言:javascript复制
library(TxDb.Hsapiens.UCSC.hg19.knownGene)library(org.Hs.eg.db)gr <- GRanges("chr11", IRanges(122929275, 122930122), strand="-")trs <- geneModelFromTxdb(TxDb.Hsapiens.UCSC.hg19.knownGene, org.Hs.eg.db,gr=gr)

假设我们构造了两个相关基因结构的数据:gene1和gene2

对于gene1,需要准备好两个文件: gene1.bed(Figure 2)gene1info.txt(Figure 3)

Figure 2:gene1.bed文件格式

Figure 3:gene1info.txt文件格式

gene2同理,准备好两个文件: gene2.bed(Figure 4)gene2info.txt(Figure 5)

Figure 4:gene2.bed文件格式

Figure 5:geneinfo2.txt文件格式

读取两个基因的相关文件,产生gene结构信息:

代码语言:javascript复制
chrname <- "chr21"filedir <- "D://trackViewer"#################设置需要显示的基因组范围gr <- GRanges(chrname,IRanges(100,1100))#################设置genestructure1的结构geneinfo <- read.table("D://trackViewer/gene1info.txt",header=TRUE,sep="t",stringsAsFactors=FALSE)genebed <- importScore(file.path(filedir, "gene1.bed"), format="BED",ranges=gr)#################gene结构文件中不需要score信息,因此将其设为NULLgenebed$dat$score <- NULLgenebed$name <- "gene00"genebed$type <- "transcript"genebed$format <- character(0)strand(genebed$dat) <- geneinfo$strandgenebed$dat$feature <- geneinfo$featuregenebed$dat$id <- geneinfo$idgenebed$dat$exon <- geneinfo$exongenebed$dat$transcript <- geneinfo$transcriptgenebed$dat$gene <- geneinfo$genegenebed$dat$symbol <- geneinfo$symbolgenebed$dat$density <- geneinfo$densitygenestructure1 <- genebed#################设置genestructure2的结构geneinfo2 <- read.table("D://trackViewer/gene2info.txt",header=TRUE,sep="t",stringsAsFactors=FALSE)genebed2 <- importScore(file.path(filedir, "gene2.bed"), format="BED",ranges=gr)genebed2$dat$score <- NULLgenebed2$name <- "gene00"genebed2$type <- "transcript"genebed2$format <- character(0)strand(genebed2$dat) <- geneinfo$strandgenebed2$dat$feature <- geneinfo$featuregenebed2$dat$id <- geneinfo$idgenebed2$dat$exon <- geneinfo$exongenebed2$dat$transcript <- geneinfo$transcriptgenebed2$dat$gene <- geneinfo$genegenebed2$dat$symbol <- geneinfo$symbolgenebed2$dat$density <- geneinfo$densitygenestructure2 <- genebed2#################optSty <- optimizeStyle(trackList(genestructure1,genestructure2))trackList <- optSty$tracksviewerStyle <- optSty$style##################设置图片位置setTrackViewerStyleParam(viewerStyle, "margin", c(.01, .05, .01, .05))#设置颜色及高度setTrackStyleParam(trackList[[1]], "color", c("#3CB371"))setTrackStyleParam(trackList[[1]], "height",0.05)setTrackStyleParam(trackList[[2]], "color", c("#D2691E"))setTrackStyleParam(trackList[[2]], "height",0.05)#设置名称names(trackList) <- c("gene1","gene2")#设置Y轴名称字体大小setTrackStyleParam(trackList[[1]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[2]], "ylabgp", list(cex=.6))#绘制第一层基因结构viewTracks(trackList,gr=gr,autoOptimizeStyle=TRUE,newpage=TRUE,viewerStyle=viewerStyle)

这样最底层的基因结构就构建完毕:

第三步:绘制第二层Coverage信息

这里你需要准备Coverage的信息文件,如示例文件coverExample.bed格式:

读取Coverage信息,并绘制图片:

代码语言:javascript复制
fox2 <- importScore(file.path(filedir, "coverExample.bed"), format="BED",ranges=gr)#################optSty <- optimizeStyle(trackList(genestructure1,genestructure2,fox2))trackList <- optSty$tracksviewerStyle <- optSty$style##################设置图片位置setTrackViewerStyleParam(viewerStyle, "margin", c(.01, .05, .01, .05))#设置颜色及高度setTrackStyleParam(trackList[[1]], "color", c("#3CB371"))setTrackStyleParam(trackList[[1]], "height",0.05)setTrackStyleParam(trackList[[2]], "color", c("#D2691E"))setTrackStyleParam(trackList[[2]], "height",0.05)setTrackStyleParam(trackList[[3]], "color", c("#4682B4"))setTrackStyleParam(trackList[[3]], "height",0.1)#设置名称names(trackList) <- c("gene1","gene2","Coverage")#设置Y轴名称字体大小setTrackStyleParam(trackList[[1]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[2]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[3]], "ylabgp",list(cex=.6))#绘制第二层Coverage信息viewTracks(trackList,gr=gr,autoOptimizeStyle=TRUE,newpage=TRUE,viewerStyle=viewerStyle)

这一步结束后,就可以在基因结构的基础上产生了reads的堆叠图:

第四步:绘制第三层SNP信息

需要准备SNP的文件,格式如示例文件:snpinfo.txt

读取SNP信息,并绘制图片:

代码语言:javascript复制
chrname = "chr21"snpinfo = read.table("D://trackViewer/snpinfo.txt",header=TRUE,sep="t")snppos = snpinfo$rspossnpname = snpinfo$rsnamesnpscore = snpinfo$rsscore#SNP得分设置为0~10,此时显示为1分为一个圆圈snpwidth <- c(1)snprange <- IRanges(start=snppos, width=snpwidth, names=snpname)sample.gr <- GRanges(seqnames=chrname, snprange)#设置snp圆圈内的颜色sample.gr$color <- sample(c("#CC9999", "#666699"), length(snpname), replace=TRUE)#设置snp圆圈上的颜色sample.gr$border <- sample(c("#999999"), length(snpname), replace=TRUE)sample.gr$score <- snpscoregr = GRanges(chrname, IRanges(100,1100))#设置snp圆圈大小sample.gr$cex = 0.8sample.gr$label.parameter.rot <- 45lollipopData <- new("track", dat=sample.gr, type="lollipopData")#################optSty <- optimizeStyle(trackList(genestructure1,genestructure2,fox2,lollipopData))trackList <- optSty$tracksviewerStyle <- optSty$style##################设置图片位置setTrackViewerStyleParam(viewerStyle, "margin", c(.01, .05, .01, .05))#设置颜色及高度setTrackStyleParam(trackList[[1]], "color", c("#3CB371"))setTrackStyleParam(trackList[[1]], "height",0.05)setTrackStyleParam(trackList[[2]], "color", c("#D2691E"))setTrackStyleParam(trackList[[2]], "height",0.05)setTrackStyleParam(trackList[[3]], "color", c("#4682B4"))setTrackStyleParam(trackList[[3]], "height",0.1)#设置名称names(trackList) <- c("gene1","gene2","Coverage","SNPinfo1")#设置Y轴名称字体大小setTrackStyleParam(trackList[[1]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[2]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[3]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[4]], "ylabgp", list(cex=.6))#绘制第三层SNP信息viewTracks(trackList,gr=gr,autoOptimizeStyle=TRUE,newpage=TRUE,viewerStyle=viewerStyle)

这一步结束后,就会在最上层加入了SNP的可视化结果:

第五步:绘制第四层SNP信息,以棒棒图的形式显示:

代码语言:javascript复制
sample.gr2 <- sample.grsample.gr2$score <- snpscore*2#当分数大于10时,得分多少转变为线段的长短而不是一个个的圆圈lollipopData2 <- new("track", dat=sample.gr2, type="lollipopData")#################optSty <- optimizeStyle(trackList(genestructure1,genestructure2,fox2,lollipopData,lollipopData2))trackList <- optSty$tracksviewerStyle <- optSty$style##################设置图片位置setTrackViewerStyleParam(viewerStyle, "margin", c(.01, .05, .01, .05))#设置颜色及高度setTrackStyleParam(trackList[[1]], "color", c("#3CB371"))setTrackStyleParam(trackList[[1]], "height",0.07)setTrackStyleParam(trackList[[2]], "color", c("#D2691E"))setTrackStyleParam(trackList[[2]], "height",0.07)setTrackStyleParam(trackList[[3]], "color", c("#4682B4"))setTrackStyleParam(trackList[[3]], "height",0.12)setTrackStyleParam(trackList[[4]], "height",0.35)setTrackStyleParam(trackList[[5]], "height",0.35)#设置名称names(trackList) <- c("gene1","gene2","Coverage","SNPinfo1","SNPinfo2")#设置Y轴名称字体大小setTrackStyleParam(trackList[[1]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[2]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[3]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[4]], "ylabgp", list(cex=.6))setTrackStyleParam(trackList[[5]], "ylabgp", list(cex=.6))viewTracks(trackList,gr=gr,autoOptimizeStyle=TRUE,newpage=TRUE,viewerStyle=viewerStyle)#添加参考线vp <- viewTracks(trackList,gr=gr,autoOptimizeStyle=TRUE,newpage=TRUE,viewerStyle=viewerStyle)addGuideLine(c(320,510), vp=vp,col="blue")

这一步结束后就完成了示例文件的图形啦!

再多唠叨几句:

我们还可以通过以下命令修改元素:

代码语言:javascript复制
browseTracks(trackList, gr=gr)

trackViewer的功能远不止如此,还支持SNP相关的其他类型的图片,随手抛几张图给大家养养眼,快一起学起来吧!

0 人点赞