今天的推文给大家介绍一个我发现的比较优秀的一个可视化R包-ggdist包,这是一个非常优秀和方便的用于绘制 分布(distributions)和不确定性(uncertainty) 的可视化绘图包,详细介绍大家可以去官网查阅:ggdist官网。本期推文涉及的内容主要如下:
- ggdist 主要绘图函数介绍
- ggdist 实例绘图
ggdist 主要绘图函数介绍
数据生成并绘制
对生成的数据集进行的最小二乘(OLS)线性回归分析
- 生成数据
set.seed(5)
n = 10
n_condition = 5
ABC =
tibble(
condition = rep(c("A","B","C","D","E"), n),
response = rnorm(n * 5, c(0,1,2,1,-1), 0.5)
)
#绘图
ABC %>%
ggplot(aes(x = response, y = condition))
geom_point(alpha = 0.5,size=5)
ylab("condition")
labs(
title = "ggdist Exercise",
subtitle = "base plot",
caption = 'Visualization by DataCharm')
theme(
text = element_text(family = "Times_New_Roman",face='bold')
)
结果如下:
我们对数据进行简单的线性拟合:
代码语言:javascript复制m_ABC = lm(response ~ condition, data = ABC)
#使用 tidy() 进行格式化展示
tidy(m_ABC)
结果如下:
- 使用stat_dist_halfeye()方法绘制
m_ABC %>%
tidy() %>%
ggplot(aes(y = term))
stat_dist_halfeye(
aes(dist = "student_t", arg1 = df.residual(m_ABC), arg2 = estimate, arg3 = std.error)
)
ylab("condition")
labs(
title = "ggdist Exercise",
subtitle = "half-eye plots",
caption = 'Visualization by DataCharm')
theme(
text = element_text(family = "Times_New_Roman",face='bold')
)
结果如下:
- 使用stat_dist_halfeye()方法绘制
ABC %>%
data_grid(condition) %>%
augment(m_ABC, newdata = ., se_fit = TRUE) %>%
ggplot(aes(y = condition))
stat_dist_halfeye(
aes(dist = "student_t", arg1 = df.residual(m_ABC), arg2 = .fitted, arg3 = .se.fit),
scale = .5
)
geom_point(aes(x = response), data = ABC, pch = "|", size = 2, position = position_nudge(y = -.15))
ylab("condition")
labs(
title = "ggdist Exercise",
subtitle = "dist-halfeye plots",
caption = 'Visualization by DataCharm')
theme(
text = element_text(family = "Times_New_Roman",face='bold')
)
结果如下:
- 使用 stat_dist_gradientinterval()方法绘制
ABC %>%
data_grid(condition) %>%
augment(m_ABC, newdata = ., se_fit = TRUE) %>%
ggplot(aes(y = condition))
stat_dist_gradientinterval(
aes(dist = "student_t", arg1 = df.residual(m_ABC), arg2 = .fitted, arg3 = .se.fit),
scale = .5
)
ylab("condition")
labs(
title = "ggdist Exercise",
subtitle = "gradientinterval plots",
caption = 'Visualization by DataCharm')
theme(
text = element_text(family = "Times_New_Roman",face='bold')
)
结果如下:
- 使用stat_dist_ccdfinterval()方法绘制
ABC %>%
data_grid(condition) %>%
augment(m_ABC, newdata = ., se_fit = TRUE) %>%
ggplot(aes(y = condition))
stat_dist_ccdfinterval(
aes(dist = "student_t", arg1 = df.residual(m_ABC), arg2 = .fitted, arg3 = .se.fit)
)
ylab("condition")
labs(
title = "ggdist Exercise",
subtitle = "CCDF plots",
caption = 'Visualization by DataCharm')
theme(
text = element_text(family = "Times_New_Roman",face='bold')
)
结果如下:
以上就是结合ggdist官网的例子对其基本绘图函数进行简单的可视化效果展示,更多其他绘制方法,大家可以阅读官网的例子。
ggdist 实例绘图
实例展示部分涉及一些基本的 ggplot2的绘图函数以及基本的数据操作部分,这里我们直接给出代码(部分重点部分会给出解释)
- 绘图原始数据格式如下(部分)
- 数据处理
df_rect <-
tibble(
xmin = c(-Inf, 2.46, 3.27),
xmax = c(Inf, Inf, Inf),
ymin = c(3, 2, 1),
ymax = c(Inf, Inf, Inf)
)
df_peng_iqr <-
df_penguins %>%
mutate(bill_ratio = bill_length_mm / bill_depth_mm) %>%
filter(!is.na(bill_ratio)) %>%
group_by(species) %>%
mutate(
median = median(bill_ratio),
q25 = quantile(bill_ratio, probs = .25),
q75 = quantile(bill_ratio, probs = .75),
n = n()
) %>%
ungroup() %>%
mutate(species_num = as.numeric(fct_rev(species)))
- 数据可视化绘制
这里直接给出可视化绘制代码(都是基本的绘图函数)
代码语言:javascript复制test_plot <-
ggplot(df_peng_iqr, aes(bill_ratio, species_num - .2))
geom_linerange(
data = df_peng_iqr %>%
group_by(species, species_num) %>%
summarize(m = unique(median)),
aes(
xmin = -Inf,
xmax = m,
y = species_num,
color = species
),
inherit.aes = F,
linetype = "dotted",
size = .7
)
geom_boxplot(
aes(
color = species,
color = after_scale(darken(color, .1, space = "HLS"))
),
width = 0,
size = .9
)
geom_rect(
aes(
xmin = q25,
xmax = median,
ymin = species_num - .05,
ymax = species_num - .35
),
fill = "grey89"
)
geom_rect(aes(xmin = q75,xmax = median,ymin = species_num - .05,ymax = species_num - .35),
fill = "grey79"
)
geom_segment(
aes(
x = q25,
xend = q25,
y = species_num - .05,
yend = species_num - .35,
color = species,
color = after_scale(darken(color, .05, space = "HLS"))
),
size = .25
)
geom_segment(aes(
x = q75,
xend = q75,
y = species_num - .05,
yend = species_num - .35,
color = species,
color = after_scale(darken(color, .05, space = "HLS"))
),
size = .25
)
geom_point(aes(color = species), shape = "|",size = 5,alpha = .4)
ggdist::stat_halfeye(aes(y = species_num,color = species,fill = after_scale(lighten(color, .5))),
shape = 18,
point_size = 3,
interval_size = 1.8,
adjust = .5,
.width = c(0, 1)
)
scale_x_continuous(
limits = c(1.57, 3.7),
breaks = seq(1.6, 3.6, by = .2),
expand = c(.001, .001)
)
#对y轴刻度进行定制化涉及
scale_y_continuous(
limits = c(.55, NA),
breaks = 1:3,
labels = c("Gentoo", "Chinstrap", "Adélie"),
expand = c(0, 0)
)
#映射颜色设置
scale_color_manual(
values = pal,
guide = F
)
#映射填充(fill)设置
scale_fill_manual(
values = pal,
guide = F
)
labs(
x = "Bill ratio",
y = NULL,
title = "ggdist Exercise",
caption = 'Visualization by DataCharm'
)
#绘图主题定制化设置
theme(
panel.grid.major = element_line(color = "grey92", size = .4),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
axis.title.x = element_text(color = "grey30", margin = margin(t = 7)),
axis.title.y = element_text(color = "grey30", margin = margin(r = 7)),
axis.text = element_text(color = "grey50"),
axis.text.y = element_text(family = "IBMPSBold",
color = rev(pal), size = 14, lineheight = .9),
axis.ticks = element_line(color = "grey92", size = .4),
axis.ticks.length = unit(0, "lines"),
plot.title = element_text(hjust = 0, color = "black",
family = "IBMPSBold",
size = 21, margin = margin(t = 10, b = 35)),
plot.caption = element_text(color = "grey50", size = 10, hjust = 1,
family = "IBMPSBold",
lineheight = 1.05, margin = margin(30, 0, 0, 0)),
plot.margin = margin(rep(20, 4))
)
test_plot
可视化结果如下:
大家可以参考下主题(theme) 的设置方法,希望能够给你启发
总结
以上就是本期推文的内容,由于自己也是第一次接触ggdist包,好多函数理解的不是很透彻,大家可自行去官网查阅;而实例演示部分涉及到多个基本的ggplot2 绘图函数的绘制,希望能够给大家提供绘图灵感,能力有限,有错误的可以在读者讨论区 留言。