67-R可视化11-用ggrepel更加美观的添加标记(火山图的实现)

2021-12-17 10:58:21 浏览数 (1)

参考:

  • Examples • ggrepel (slowkow.com)[1]

前言

上一讲我们提到了66-R可视化10-自由的在ggplot上添加文本(柱状图加计数)[2]

可是,有的时候,并不是所有的text 文本,都可以非常理想的实现我们希望达到的效果。

这时候颜值高一些的ggrepel 就登场了:

代码语言:javascript复制
library(ggrepel)
set.seed(42)

dat <- subset(mtcars, wt > 2.75 & wt < 3.45)
dat$car <- rownames(dat)

p <- ggplot(dat, aes(wt, mpg, label = car))  
  geom_point(color = "red")

p1 <- p   geom_text()   labs(title = "geom_text()")

p2 <- p   geom_text_repel()   labs(title = "geom_text_repel()")

gridExtra::grid.arrange(p1, p2, ncol = 2)

包的功能如其名字,repel,就是让标记被原本的点推开~

美化作图:以火山图为例

如下效果:

老规矩先加载包:

代码语言:javascript复制
my_packages<- c("maftools", "data.table",
                "RColorBrewer", "paletteer", "ggplot2",
                "ggpubr", "tidyverse", "REdaS", "ggrepel")
tmp <- sapply(my_packages, function(x) library(x, character.only = T)); rm(tmp, my_packages)

接着就是完整代码了:

代码语言:javascript复制
# 2. ggrepel ----
set.seed(1234)
DEG <- data.frame(
  genes = paste0("gene", 1:100),
  fold_change = round(runif(100, -10, 10), 2),
  P_value = runif(100, 0.001, 0.1)
)
DEG$group <-  ifelse((DEG$P_value > 0.05) | (abs(DEG$fold_change) < 2) , "no-Significant",
                     ifelse(DEG$fold_change > 2, "increase", "decrease"))
DEG$group <- factor(DEG$group, levels = c("decrease", "no-Significant", 
                                                      "increase"))
sig_DEG <- DEG[!DEG$group %in% "no-Significant",]
head_genes <- head(sig_DEG[order(abs(sig_DEG$fold_change), decreasing = T),]$genes, 15)
for_label <-  DEG[DEG$genes %in% head_genes,]

(p <- ggplot(data = DEG, 
             aes(x = fold_change, 
                 y = -log10(P_value)))  
   geom_point(size=3.5, alpha = 0.6, 
              aes(color = group))  
    theme_bw()   theme(
      axis.title = element_text(size = 14, face = "bold"), 
      axis.text = element_text(size = 14), 
      legend.title = element_text(size = 14),
      legend.text = element_text(size = 12))   
   ylab("-log10(Pvalue)")  
   scale_color_manual(values = c("blue", "grey", "red"))  
   geom_hline(yintercept = -log10(0.05),lty=4,col="black",lwd=0.8)   
    geom_vline(xintercept = c(-2, 2),lty=4,col="black",lwd=0.8)   
   geom_label_repel(
     data = for_label,
     aes(label = genes), 
     alpha = 0.7,
     max.overlaps = 15
   ) 
)

我们可以对比一下geom_label:

关键就是geom_text_repel 这个函数啦~

一些参数与操作

操作

隐藏某些labels

代码语言:javascript复制
dat2 <- subset(mtcars, wt > 3 & wt < 4)
# Hide all of the text labels.
dat2$car <- ""
# Let's just label these items.
ix_label <- c(2, 3, 14)
dat2$car[ix_label] <- rownames(dat2)[ix_label]

ggplot(dat2, aes(wt, mpg, label = car))  
  geom_text_repel()  
  geom_point(color = ifelse(dat2$car == "", "grey50", "red"))

对于大量散点图就可以非常实用的实现显示关键点了:

代码语言:javascript复制
set.seed(42)

dat3 <- rbind(
  data.frame(
    wt  = rnorm(n = 10000, mean = 3),
    mpg = rnorm(n = 10000, mean = 19),
    car = ""
  ),
  dat2[,c("wt", "mpg", "car")]
)

ggplot(dat3, aes(wt, mpg, label = car))  
  geom_point(data = dat3[dat3$car == "",], color = "grey50")  
  geom_text_repel(box.padding = 0.5, max.overlaps = Inf)  
  geom_point(data = dat3[dat3$car != "",], color = "red")

标记们,你们可别聚众打架啊

代码语言:javascript复制
set.seed(42)

n <- 15
dat4 <- data.frame(
  x = rep(1, length.out = n),
  y = rep(1, length.out = n),
  label = letters[1:n]
)

# Set it globally:
options(ggrepel.max.overlaps = Inf)

p1 <- ggplot(dat4, aes(x, y, label = label))  
  geom_point()  
  geom_label_repel(box.padding = 0.5, max.overlaps = 10)  
  labs(title = "max.overlaps = 10 (default)")

p2 <- ggplot(dat4, aes(x, y, label = label))  
  geom_point()  
  geom_label_repel(box.padding = 0.5)  
  labs(title = "max.overlaps = Inf")

gridExtra::grid.arrange(p1, p2, ncol = 2)

自行感受啦~

把标记往左往右推

代码语言:javascript复制
set.seed(42)
d <- data.frame(
  x1 = 1,
  y1 = rnorm(10),
  x2 = 2,
  y2 = rnorm(10),
  lab = state.name[1:10]
)

p <- ggplot(d, aes(x1, y1, xend = x2, yend = y2, label = lab, col = lab))  
  geom_segment(size = 1)  
  guides(color = "none")  
  theme(axis.title.x = element_blank())  
  geom_text_repel(
    nudge_x = -0.2, direction = "y", hjust = "right"
  )  
  geom_text_repel(
    aes(x2, y2), nudge_x = 0.1, direction = "y", hjust = "left"
  )

p   scale_x_continuous(
  breaks = 1:2, labels = c("Dimension 1", "Dimension 2"),
  expand = expansion(mult = 0.5)
)

主要和nudge_* 参数有关。

永远有线

代码语言:javascript复制
p <- ggplot(dat, aes(wt, mpg, label = car))  
  geom_point(color = "red")

p1 <- p  
  geom_text_repel(min.segment.length = 0, seed = 42, box.padding = 0.5)  
  labs(title = "min.segment.length = 0")

p2 <- p  
  geom_text_repel(min.segment.length = Inf, seed = 42, box.padding = 0.5)  
  labs(title = "min.segment.length = Inf")

gridExtra::grid.arrange(p1, p2, ncol = 2)

我们还可以给线段拐个歪~

代码语言:javascript复制
set.seed(42)
ggplot(dat, aes(wt, mpg, label = car))  
  geom_point(color = "red")  
  geom_text_repel(
    nudge_x = .15,
    box.padding = 0.5,
    nudge_y = 1,
    segment.curvature = -0.1,
    segment.ncp = 3,
    segment.angle = 20
  )

永远靠边站

hjust 参数,0 是靠右,0.5 居中,1 是靠左:

代码语言:javascript复制
set.seed(42)

p <- ggplot(mtcars, aes(y = wt, x = 1, label = rownames(mtcars)))  
  geom_point(color = "red")  
  ylim(1, 5.5)  
  theme(
    axis.line.x  = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x  = element_blank(),
    axis.title.x = element_blank()
  )

p1 <- p  
  xlim(1, 1.375)  
  geom_text_repel(
    force        = 0.5,
    nudge_x      = 0.15,
    direction    = "y",
    hjust        = 0,
    segment.size = 0.2
  )  
  ggtitle("hjust = 0")

p2 <- p  
  xlim(1, 1.375)  
  geom_text_repel(
    force        = 0.5,
    nudge_x      = 0.2,
    direction    = "y",
    hjust        = 0.5,
    segment.size = 0.2
  )  
  ggtitle("hjust = 0.5 (default)")

p3 <- p  
  xlim(0.25, 1)  
  scale_y_continuous(position = "right")  
  geom_text_repel(
    force        = 0.5,
    nudge_x      = -0.25,
    direction    = "y",
    hjust        = 1,
    segment.size = 0.2
  )  
  ggtitle("hjust = 1")

gridExtra::grid.arrange(p1, p2, p3, ncol = 3)

柱状图标记文本的新思路

我先前写过:66-R可视化10-自由的在ggplot上添加文本(柱状图加计数)[3]

这样有个小箭头,好像也还不错~

代码语言:javascript复制
p <- ggplot(mtcars, aes(factor(cyl), mpg))  
  stat_summary(
    fill = "gray90",
    colour = "black",
    fun = "mean",
    geom = "col"
  )

p1 <- p   stat_summary(
    aes(label = round(stat(y))),
    fun = "mean",
    geom = "text_repel",
    min.segment.length = 0, # always draw segments
    position = position_nudge(y = -2)
  )  
  labs(title = "position_nudge()")

p2 <- p   stat_summary(
    aes(label = round(stat(y))),
    fun = "mean",
    geom = "text_repel",
    min.segment.length = 0, # always draw segments
    position = position_nudge_repel(y = -2)
  )  
  labs(title = "position_nudge_repel()")

gridExtra::grid.arrange(p1, p2, ncol = 2)

参数

这里我都是节选的官方说明文档中的介绍:Examples • ggrepel (slowkow.com)[4]

如果需要了解更多实例,参见上面的文档。

这里复习一下参数:

代码语言:javascript复制
nudge_x:调整标签x轴位置
nudge_y:同上
bg.color = "grey30", # shadow color
bg.r = 0.15          # shadow radius
hjust :调整文本的位置
max.overlaps = Inf # 永远显示线段
segment.size = 0.1
segment.linetype = 6
segment.curvature = -0.1
# 改善一下你的线段的风格
direction # x轴放置还是y 轴放置

再来改善一下火山图

灵感和代码参加:RNAseqStat/enhance_volcano.R at master · xiayh17/RNAseqStat (github.com)[5]

代码语言:javascript复制
(p <- ggplot(data = DEG, 
             aes(x = fold_change, 
                 y = -log10(P_value)))  
   geom_point(size=3.5, alpha = 0.6, 
              aes(color = group))   
   geom_point(size = 3, shape = 1, data = for_label)   
   theme_bw()   theme(
     axis.title = element_text(size = 14, face = "bold"), 
     axis.text = element_text(size = 14), 
     legend.title = element_text(size = 14),
     legend.text = element_text(size = 12))   
   scale_x_continuous(expand = c(0.15, 0.15))   
   ylab("-log10(Pvalue)")  
   scale_color_manual(values = c("blue", "grey", "red"))  
   geom_hline(yintercept = -log10(0.05),lty=4,col="black",lwd=0.8)   
   geom_vline(xintercept = c(-2, 2),lty=4,col="black",lwd=0.8)   
   geom_text_repel(
     size = 4,
     data = for_label[which(for_label$fold_change < 0),],
     aes(label = genes),
     nudge_y      = -0.2,
     direction    = "y",
     hjust        = 1,
     segment.size = 0.1,
     segment.linetype = 6,
     segment.curvature = -0.1,
     max.overlaps = 10,
     max.iter = 1000000,
     max.time = 10,
     nudge_x =  for_label[which(for_label$fold_change < 0),]$fold_change   1.1*min(for_label$fold_change),
     min.segment.length = 0,
     bg.color = "#e0e0e0", # shadow color
     bg.r = 0.15          # shadow radius
   )   
   geom_text_repel(
     size = 4,
     data = for_label[which(for_label$fold_change > 0),],
     aes(label = genes),
     nudge_y      = -0.2, # move
     direction    = "y",
     hjust        = 0,
     segment.size = 0.1,
     segment.linetype = 6,
     segment.curvature = -0.1,
     max.overlaps = 10, # never overlaps
     max.iter = 1000000,
     max.time = 10, # DO NOT show too much overlaps
     nudge_x =  for_label[which(for_label$fold_change > 0),]$fold_change   1.1*max(for_label$fold_change),
     # right move right
     min.segment.length = 0, # always lines
     bg.color = "#e0e0e0", # shadow color
     bg.r = 0.15          # shadow radius
   )
)

你喜欢吗?

参考资料

[1]Examples • ggrepel (slowkow.com): https://ggrepel.slowkow.com/articles/examples.html

[2]66-R可视化10-自由的在ggplot上添加文本(柱状图加计数): 66-R可视化10-自由的在ggplot上添加文本(柱状图加计数).md

[3]66-R可视化10-自由的在ggplot上添加文本(柱状图加计数): 66-R可视化10-自由的在ggplot上添加文本(柱状图加计数).md

[4]Examples • ggrepel (slowkow.com): https://ggrepel.slowkow.com/articles/examples.html#examples-1

[5]RNAseqStat/enhance_volcano.R at master · xiayh17/RNAseqStat (github.com): https://github.com/xiayh17/RNAseqStat/blob/master/R/enhance_volcano.R

0 人点赞