使用 dplyr 套件整理資料

郭耀仁

2017-07-02

安裝與載入

install.packages("dplyr")
library(dplyr)

不同的函數

函數 用途
filter() 觀測值(列)選擇
select() 變數(欄)選擇
mutate() 新增變數(欄)
arrange() 排序
summarise() 聚合
group_by() 分組

filter()

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
filter(straw_hat_df, gender == "Female")

filter()(2)

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
filter(straw_hat_df, gender == "Female" & age >= 30)

select()

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
select(straw_hat_df, name, gender)

使用 %>% 運算子

# 選出女性船員,但只回傳 name 變數
load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
straw_hat_df %>%
  filter(gender == "Female") %>%
  select(name)

mutate()

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
straw_hat_df %>%
  mutate(birth_year = as.integer(format(Sys.Date(), "%Y")) - age)

arrange()

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
straw_hat_df %>%
  mutate(birth_year = as.integer(format(Sys.Date(), "%Y")) - age) %>%
  arrange(birth_year)
  # arrange(desc(birth_year))

summarise()

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
summarise(straw_hat_df, mean_age = mean(age))

group_by()

load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
straw_hat_df %>%
  group_by(gender) %>%
  summarise(mean_age = mean(age))

join 系列函數

種類 函數
內部連結 inner_join()
左外部連結 left_join()
右外部連結 right_join()
全外部連結 full_join()

inner_join()

library(dplyr)

# 左表格 straw_hat_df
load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
# 右表格 straw_hat_devil_fruit
load(url("https://storage.googleapis.com/r_rookies/straw_hat_devil_fruit.RData"))

# 內部連結
joined_df <- inner_join(straw_hat_df, straw_hat_devil_fruit)

left_join()

library(dplyr)

# 左表格 straw_hat_df
load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
# 右表格 straw_hat_devil_fruit
load(url("https://storage.googleapis.com/r_rookies/straw_hat_devil_fruit.RData"))

# 左外部連結
joined_df <- left_join(straw_hat_df, straw_hat_devil_fruit)

right_join()

library(dplyr)

# 左表格 straw_hat_df
load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
# 右表格 straw_hat_devil_fruit
load(url("https://storage.googleapis.com/r_rookies/straw_hat_devil_fruit.RData"))

# 右外部連結
joined_df <- right_join(straw_hat_df, straw_hat_devil_fruit)

full_join()

library(dplyr)

# 左表格 straw_hat_df
load(url("https://storage.googleapis.com/r_rookies/straw_hat_df.RData"))
# 右表格 straw_hat_devil_fruit
load(url("https://storage.googleapis.com/r_rookies/straw_hat_devil_fruit.RData"))

# 全外部連結
joined_df <- full_join(straw_hat_df, straw_hat_devil_fruit)