Andee Kaplan
1
2
library(tidyverse)
library(reshape2)

1、matrix

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
set.seed(123)
scores_mt = matrix(round(rnorm(40, mean = 80, sd=10)), 
                nrow = 10, ncol = 4,
                dimnames = list(paste0("Stu",1:10),
                                paste0("Subject-",LETTERS[1:4])))
class(scores_mt)
# [1] "matrix" "array"
head(scores_mt)
#      Subject-A Subject-B Subject-C Subject-D
# Stu1        74        92        69        84
# Stu2        78        84        78        77
# Stu3        96        84        70        89
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
##(1) 宽变长
reshaped = melt(scores,
                value.name = "Score")
head(reshaped)
#   Var1      Var2 Score
# 1 Stu1 Subject-A    74
# 2 Stu2 Subject-A    78
# 3 Stu3 Subject-A    96
## Var1 --- rownames
## Var2 --- colnames

##(2) 长变宽(还原)
reshaped %>% 
  dcast(Var1 ~ Var2) %>% head()
#   Var1 Subject-A Subject-B Subject-C Subject-D
# 1 Stu1        74        92        69        84
# 2 Stu2        78        84        78        77
# 3 Stu3        96        84        70        89

2、data.frame

2.1 简单

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
scores_df = scores_mt %>% 
  as.data.frame() %>% 
  tibble::rownames_to_column("Name")
class(scores_df)
# [1] "data.frame"
head(scores_df)
#   Name Subject-A Subject-B Subject-C Subject-D
# 1 Stu1        74        92        69        84
# 2 Stu2        78        84        78        77
# 3 Stu3        96        84        70        89
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
##(1) 宽变长
reshaped = scores_df %>% 
  melt(id="Name", 
       variable.name="Subject",
       value.name = "Score")
head(reshaped)
#   Name   Subject Score
# 1 Stu1 Subject-A    74
# 2 Stu2 Subject-A    78
# 3 Stu3 Subject-A    96

##(2) 长变宽(还原)
reshaped %>% 
  dcast(Name ~ Subject, 
        value.var = "Score") %>% head()
#    Name Subject-A Subject-B Subject-C Subject-D
# 1  Stu1        74        92        69        84
# 2 Stu10        76        75        93        76
# 3  Stu2        78        84        78        77

2.2 复杂

1
2
3
4
5
6
7
8
scores_df_Anno = scores_df %>% 
                    dplyr::mutate(Class=paste0("Class",rep(c("01","02"), 5)),
                                  Age=round(rnorm(10, 20, 1)), .before=2)
head(scores_df_Anno)
#   Name   Class Age Subject-A Subject-B Subject-C Subject-D
# 1 Stu1 Class01  20        74        92        69        84
# 2 Stu2 Class02  20        78        84        78        77
# 3 Stu3 Class01  20        96        84        70        89
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
##(1) 宽变长
reshaped = scores_df_Anno %>% 
  melt(id=c("Name","Class","Age"), 
       variable.name="Subject",
       value.name = "Score")
head(reshaped)
#   Name   Class Age   Subject Score
# 1 Stu1 Class01  20 Subject-A    74
# 2 Stu2 Class02  20 Subject-A    78
# 3 Stu3 Class01  20 Subject-A    96

##(2) 长变宽(还原)
reshaped %>% dcast(Name + Class + Age ~ Subject,
                   value.var = "Score") %>% head()
#    Name   Class Age Subject-A Subject-B Subject-C Subject-D
# 1  Stu1 Class01  20        74        92        69        84
# 2 Stu10 Class02  21        76        75        93        76
# 3  Stu2 Class02  20        78        84        78        77

3、list

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
scores_list = list(Stu1=c(1,2), Stu2=c(3,4), Stu3=c(5,6))
# $Stu1
# [1] 1 2
# 
# $Stu2
# [1] 3 4
# 
# $Stu3
# [1] 5 6

melt(scores_list)
#   value   L1
# 1     1 Stu1
# 2     2 Stu1
# 3     3 Stu2
# 4     4 Stu2
# 5     5 Stu3
# 6     6 Stu3