https://www.leansigmacorporation.com/box-plot-with-minitab/

一、ggplot绘制基础箱图

0、示例数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
library(ggplot2)
library(patchwork)

#组别名最好是字符型;如果是数值类型,最好转为因子化
ToothGrowth$dose = factor(ToothGrowth$dose)
summary(ToothGrowth)
#       len        supp     dose   
# Min.   : 4.20   OJ:30   0.5:20  
# 1st Qu.:13.07   VC:30   1  :20  
# Median :19.25           2  :20  
# Mean   :18.81                   
# 3rd Qu.:25.27                   
# Max.   :33.90

1、基础绘图

1
2
3
4
5
p1 = ggplot(ToothGrowth, aes(x=dose, y=len, fill=dose)) + 
  geom_boxplot()
p2 = ggplot(ToothGrowth, aes(x=dose, y=len, fill=supp)) + 
  geom_boxplot()
p1 | p2

2、离群点相关

1
2
3
4
5
6
p1 = ggplot(ToothGrowth, aes(x=dose, y=len)) + 
  geom_boxplot(outlier.color = "red",
               outlier.size = 0.5)
p2 = ggplot(ToothGrowth, aes(x=dose, y=len)) + 
  geom_boxplot(outlier.alpha = 0)  #透明度为0,相当于不绘制离群点
p1 + p2

3、添加随机抖动点/whisker须线

1
2
3
4
5
6
7
ggplot(ToothGrowth, aes(x=dose, y=len)) + 
  geom_boxplot()
  geom_jitter(color = "black", size=0.8)

ggplot(ToothGrowth, aes(x=dose, y=len)) + 
  stat_boxplot(geom = "errorbar", width = 0.2) +
  geom_boxplot()

image-20221105135017048

4、嵌套小提琴图

1
2
3
4
5
6
p1 = ggplot(ToothGrowth, aes(x=dose, y=len, fill=dose)) + 
  geom_violin() 
p2 = ggplot(ToothGrowth, aes(x=dose, y=len, fill=dose)) + 
  geom_violin() + 
  geom_boxplot(width=0.2, color="black", alpha=0.5) 
p1 | p2

二、ggpubr进行两/多组比较

ggpubr包提供了组间比较的分析函数与可视化函数,主要参考自http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/

0、示例数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
library(ggpubr)
library(patchwork)

#组别名最好是字符型;如果是数值类型,最好转为因子化
ToothGrowth$dose = factor(ToothGrowth$dose)
summary(ToothGrowth)
#       len        supp     dose   
# Min.   : 4.20   OJ:30   0.5:20  
# 1st Qu.:13.07   VC:30   1  :20  
# Median :19.25           2  :20  
# Mean   :18.81                   
# 3rd Qu.:25.27                   
# Max.   :33.90

1、组间差异检验

1.1 两组间比较

(1)选择有参法还是无参法;(2)能否进行配对比较

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# (1)Wilcoxon
compare_means(len ~ supp, data = ToothGrowth) #default
# # A tibble: 1 x 8
# .y.   group1 group2      p p.adj p.format p.signif method  
# <chr> <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
# 1 len   OJ     VC     0.0645 0.064 0.064    ns       Wilcoxon

# (2)t检验
compare_means(len ~ supp, data = ToothGrowth,
              method = "t.test") 
# 1 len   OJ     VC     0.0606 0.061 0.061    ns       T-test

# (3) 修改adjP的计算方法
compare_means(len ~ supp, data = ToothGrowth,
              p.adjust.method = "BH") #default "holm"
# 1 len   OJ     VC     0.0645 0.064 0.064    ns       Wilcoxon

# (4)考虑其它变量的影响
compare_means(len ~ supp, data = ToothGrowth,
              group.by = "dose") 
# # A tibble: 3 x 9
# dose  .y.   group1 group2       p p.adj p.format p.signif method  
# <fct> <chr> <chr>  <chr>    <dbl> <dbl> <chr>    <chr>    <chr>   
# 1 0.5   len   OJ     VC     0.0232  0.046 0.023    *        Wilcoxon
# 2 1     len   OJ     VC     0.00403 0.012 0.004    **       Wilcoxon
# 3 2     len   OJ     VC     1       1     1.000    ns       Wilcoxon

# (5)如果进行配对分析
#那么需要保持每组的样本排列顺序是一致的
compare_means(len ~ supp, data = ToothGrowth,
              paired = T) 
# 1 len   OJ     VC     0.00431 0.0043 0.0043   **       Wilcoxon

# (6)修改标签阈值
compare_means(len ~ supp, data = ToothGrowth,
              symnum.args = list(cutpoints = c(0, 0.01, 0.05, 1), 
                                  symbols = c("***", "*", "not"))) 
# 1 len   OJ     VC     0.0645 0.064 0.064    not      Wilcoxon

1.2 多分组比较

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#(1)所有两两间比较
compare_means(len ~ dose, data = ToothGrowth) 
# .y.   group1 group2            p      p.adj p.format p.signif method  
# <chr> <chr>  <chr>         <dbl>      <dbl> <chr>    <chr>    <chr>   
# 1 len   0.5    1      0.00000702   0.000014   7.0e-06  ****     Wilcoxon
# 2 len   0.5    2      0.0000000841 0.00000025 8.4e-08  ****     Wilcoxon
# 3 len   1      2      0.000177     0.00018    0.00018  ***      Wilcoxon

# (2)都和0.5的组进行比较
compare_means(len ~ dose, data = ToothGrowth,
              ref.group = "0.5") 
# .y.   group1 group2            p      p.adj p.format p.signif method  
# <chr> <chr>  <chr>         <dbl>      <dbl> <chr>    <chr>    <chr>   
# 1 len   0.5    1      0.00000702   0.000007   7.0e-06  ****     Wilcoxon
# 2 len   0.5    2      0.0000000841 0.00000017 8.4e-08  ****     Wilcoxon

# (3)方差分析-有参
compare_means(len ~ dose, data = ToothGrowth,
              method = "anova") #有参
# .y.          p   p.adj p.format p.signif method
# <chr>    <dbl>   <dbl> <chr>    <chr>    <chr> 
# 1 len   9.53e-16 9.5e-16 9.5e-16  ****     Anova

# (4)方差分析-无参
compare_means(len ~ dose, data = ToothGrowth,
              method = "kruskal.test") #无参
# .y.               p        p.adj p.format p.signif method        
# <chr>         <dbl>        <dbl> <chr>    <chr>    <chr>         
# 1 len   0.00000000148 0.0000000015 1.5e-09  ****     Kruskal-Wallis

2、绘制箱图

2.1 两组间比较

(1)不同比较方式

1
2
3
4
5
6
7
8
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
               # 配色方案 ?ggboxplot
               color = "supp", palette = "aaas",
               add = "jitter")
#  Add p-value
p1 = p + stat_compare_means() #default Wilcoxon
p2 = p + stat_compare_means(method = "t.test")
p1 + p2

(2)标签显示格式

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
#标签位置
p1 = p + stat_compare_means(label.x.npc = "left",
                       # label.x = 1.5, label.y = 40
                       label.y.npc = "top")
#标签内容(数值改为星号)
p2 = p + stat_compare_means(aes(label = ..p.signif..)) 
#自定义阈值
p3 = p + stat_compare_means(aes(label = ..p.signif..), 
                       symnum.args = list(cutpoints = c(0, 0.01, 0.05, 1), 
                                          symbols = c("***",  "*", "notsig")),
                       label.x = 1.5, label.y = 40)
p1 | p2 | p3

(3)配对分析

1
2
3
4
5
# 要确保相同样本在不同组的排列顺序相同
ggpaired(ToothGrowth, x = "supp", y = "len",
         color = "supp", palette = "jco",
         line.color = "gray", line.size = 0.4) +
  stat_compare_means(paired = TRUE)

(4)考虑其它分组变量

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
               color = "supp", palette = "jco",
               #add = "jitter",
               facet.by = "dose", 
               short.panel.labs = F)
p1 = p + stat_compare_means(label = "p.format")
# p + stat_compare_means(label =  "p.signif", label.x = 1.5)

p <- ggboxplot(ToothGrowth, x = "dose", y = "len",
               color = "supp", palette = "jco")
p2 = p + stat_compare_means(aes(group = supp))
# p + stat_compare_means(aes(group = supp), label = "p.signif")
p1 / p2

2.2 多组间比较

(1)方差分析

1
2
3
4
5
6
7
8
p1 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+
  stat_compare_means()

p2 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+
  stat_compare_means(method = "anova")
p1 + p2

(2)两两比较

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
p1 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+ 
  stat_compare_means(comparisons = list( c("0.5", "1"), 
                                         c("1", "2"), 
                                         c("0.5", "2") ))


p2 = ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+ 
  stat_compare_means(comparisons = list( c("0.5", "1"), 
                                         c("1", "2"), 
                                         c("0.5", "2") ), 
                     label.y = c(29, 35, 40))+ #指定标签的高度
  stat_compare_means(label.y = 45) #添加方差分析结果

p1 | p2
1
2
3
4
5
6
## 直接指定一个参考组
ggboxplot(ToothGrowth, x = "dose", y = "len",
          color = "dose", palette = "jco")+
  stat_compare_means(method = "anova", label.y = 40)+      # Add global p-value
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = "0.5")                    # Pairwise comparison against reference