tidyr是tidyverse系列的组成工具包之一,其主要功能侧重于表格的reshaping and organizing. 下面简要学习其几个比较常用的函数。
1. 表格长宽转换#
类似reshape2包
1
2
3
4
5
6
7
8
9
10
11
|
df <- data.frame(
id = paste0("sample_",1:3),
time1 = c(10, 20, 30),
time2 = c(15, 25, 35),
time3 = c(20, 30, 40)
)
df
# id time1 time2 time3
# 1 sample_1 10 15 20
# 2 sample_2 20 25 30
# 3 sample_3 30 35 40
|
1.1 pivot_longer()#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
df_long <- df %>%
pivot_longer(cols = starts_with("time"),
names_to = "time_point",
values_to = "value")
head(df_long)
# # A tibble: 6 × 3
# id time_point value
# <chr> <chr> <dbl>
# 1 sample_1 time1 10
# 2 sample_1 time2 15
# 3 sample_1 time3 20
# 4 sample_2 time1 20
# 5 sample_2 time2 25
# 6 sample_2 time3 30
|
1.2 pivot_wider()#
1
2
3
4
5
6
7
8
9
|
df_wide <- df_long %>%
pivot_wider(names_from = "time_point", values_from = "value")
df_wide
# # A tibble: 3 × 4
# id time1 time2 time3
# <chr> <dbl> <dbl> <dbl>
# 1 sample_1 10 15 20
# 2 sample_2 20 25 30
# 3 sample_3 30 35 40
|
2. 表格单列分割#
1
2
3
4
5
6
7
8
9
|
df <- data.frame(
id = 1:3,
info = c("sp1_2010", "sp2_2020", "sp3_2025")
)
df
# id info
# 1 1 sp1_2010
# 2 2 sp2_2020
# 3 3 sp3_2025
|
2.1 separate_longer_delim()#
1
2
3
4
5
6
7
8
9
|
df %>%
separate_longer_delim(info, delim = "_")
# id info
# 1 1 sp1
# 2 1 2010
# 3 2 sp2
# 4 2 2020
# 5 3 sp3
# 6 3 2025
|
2.2 separate_wider_delim()#
1
2
3
4
5
6
7
8
|
df %>%
separate_wider_delim(info, delim = "_", names =c("col_1", "col_2"))
# # A tibble: 3 × 3
# id col_1 col_2
# <int> <chr> <chr>
# 1 1 sp1 2010
# 2 2 sp2 2020
# 3 3 sp3 2025
|
3. expand()排列组合#
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
samples = c("sp1","sp2","sp3")
years = c(1999, 1999, 2020)
df = data.frame(
col_1 = samples,
col_2 = years
)
df
# col_1 col_2
# 1 sp1 1999
# 2 sp2 1999
# 3 sp3 2020
df %>%
tidyr::expand(col_1, col_2)
# # A tibble: 6 × 2
# col_1 col_2
# <chr> <dbl>
# 1 sp1 1999
# 2 sp1 2020
# 3 sp2 1999
# 4 sp2 2020
# 5 sp3 1999
# 6 sp3 2020
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
samples = c("sp1","sp2","sp3")
years = c(1999, 2020)
# keep with same length
length(years) = 3
years
# [1] 1999 2020 NA
df = data.frame(
col_1 = samples,
col_2 = years
)
df %>%
tidyr::expand(col_1, col_2) %>%
na.omit()
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
samples = c("sp1","sp2","sp3")
years = c(1999, 2020)
expand.grid(samples, years)
# Var1 Var2
# 1 sp1 1999
# 2 sp2 1999
# 3 sp3 1999
# 4 sp1 2020
# 5 sp2 2020
# 6 sp3 2020
combn(samples, m = 2)
# [,1] [,2] [,3]
# [1,] "sp1" "sp1" "sp2"
# [2,] "sp2" "sp3" "sp3"
|