1. 加载需要的R包
代码语言:text复制library(ggstatsplot)
library(ggplot2)
2. 用到的数据
代码语言:text复制本期用到的数据
movies_long
dat <- movies_long
3. 组间比较
3.1 初步绘制
代码语言:text复制p1 <- ggbetweenstats(
dat,
x = mpaa,
y = rating
)
p1
3.2 标记outlier
代码语言:text复制
type
为可选统计类型包括✅
"p"
→ parametricundefined✅"np"
→ non-parametricundefined✅"r"
→ robustundefined✅"bf"
→ Bayesian
p2 <- ggbetweenstats(
data = dat,
x = mpaa,
y = rating,
type = "r", # "parametric", "nonparametric", "robust", "bayes"
outlier.tagging = TRUE,
outlier.label = title
)
p2
3.3 当然你也可以选择画boxplot
更改plot.type
即可
p3 <- ggbetweenstats(
dat,
x = mpaa,
y = rating,
plot.type = "box" # box, boxviolin, violin
)
p3
3.4 只画violinplot
代码语言:text复制p4 <- ggbetweenstats(
dat,
x = mpaa,
y = rating,
plot.type = "violin" # box, boxviolin, violin
)
p4
3.5 把这几个图组合到一起
ps:你也可以选择别的拼图包,如cowplot
等
combine_plots(
list(p1, p2, p3, p4),
plotgrid.args = list(nrow = 2),
annotation.args = list(
title = "Comparison of rating among different mpaa",
caption = "Source: movies_long"
)
)
4. 复杂分组间比较
用到的函数是
grouped_ggbetweenstats
4.1 比较不同genre的mpaa各组的rating
代码语言:text复制 grouped_ggbetweenstats(data = dat,
## arguments relevant for ggbetweenstats
x = mpaa,
y = rating,
grouping.var = genre,
xlab = "mpaa",
ylab = "rating",
pairwise.display = "significant", ## display only significant pairwise comparisons
p.adjust.method = "fdr", ## adjust p-values for multiple tests using this method
# ggtheme = ggthemes::theme_tufte(),
package = "ggsci",
palette = "default_jco",
outlier.tagging = TRUE,
outlier.label = title,
## arguments relevant for combine_plots
annotation.args = list(title = "Comparison of rating among different mpaa"),
plotgrid.args = list(ncol = 3,nrow = 3)
)
4.2 一次性应用不同分析方法
上面这种方法虽然很方便,但我们有可能想在不同的亚组中用不同的统计方法,这个时候可以利用purr
包进行批量绘制
## 分割数据并转为list(由于数据较大,这里仅选取3个genre进行下一步的分析)
dat_list <- dat %>%
dplyr::filter(genre %in% c("Drama", "Action", "Comedy")) %>%
split(f = .$genre, drop = T)
## 查看list参数
length(dat_list)
names(dat_list)
## 用`pmap`函数进行批量绘制
plot_list <- purrr::pmap(
.l = list(
data = dat_list,
x = "mpaa",
y = "rating",
outlier.tagging = TRUE,
outlier.label = "title",
outlier.label.args = list(
list(size = 3, color = "#56B4E9"),
list(size = 2.5, color = "#009E73"),
list(size = 3.5, color = "#F0E442")
),
xlab = "mpaa",
ylab = "rating",
title = list(
"Drama",
"Action",
"Comedy"
),
type = list("r", "bf", "np"),
pairwise.display = list("s", "ns", "all"),
p.adjust.method = list("hommel", "bonferroni", "BH"),
conf.level = list(0.99, 0.95, 0.90),
k = list(1, 2, 3),
effsize.type = list(
NULL,
"partial_omega",
"partial_eta"
),
plot.type = list("box", "boxviolin", "violin"),
package = list("nord", "ochRe", "awtools"),
palette = list("aurora", "parliament", "bpalette"),
ggtheme = list(
ggthemes::theme_stata(),
ggplot2::theme_classic(),
ggthemes::theme_fivethirtyeight()
)
),
.f = ggbetweenstats
)
最后进行可视化
代码语言:text复制combine_plots(
plotlist = plot_list,
annotation.args = list(title = ""),
plotgrid.args = list(ncol = 3)
)
<center>最后祝大家早日不卷!~</center>
点个在看吧各位~ ✐.ɴɪᴄᴇ ᴅᴀʏ 〰