Summarizing with dplyr

Packages

library(tidyverse)

Load iris data set

data("iris")
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

summarise: to summarize only a single column

iris %>% group_by(Species) %>% summarise(mean(Sepal.Length))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
##   Species    `mean(Sepal.Length)`
##   <fct>                     <dbl>
## 1 setosa                     5.01
## 2 versicolor                 5.94
## 3 virginica                  6.59

summarise_all: to summarize all columns

iris %>% group_by(Species) %>% summarise_all(.funs = c(mean="mean"))
## # A tibble: 3 x 5
##   Species   Sepal.Length_mean Sepal.Width_mean Petal.Length_me… Petal.Width_mean
##   <fct>                 <dbl>            <dbl>            <dbl>            <dbl>
## 1 setosa                 5.01             3.43             1.46            0.246
## 2 versicol…              5.94             2.77             4.26            1.33 
## 3 virginica              6.59             2.97             5.55            2.03

summarise_at: to summarize only certain columns

iris %>% group_by(Species) %>% summarise_at(
  .vars = vars(Sepal.Length, Sepal.Width),
  .funs = c(mean="mean"))
## # A tibble: 3 x 3
##   Species    Sepal.Length_mean Sepal.Width_mean
##   <fct>                  <dbl>            <dbl>
## 1 setosa                  5.01             3.43
## 2 versicolor              5.94             2.77
## 3 virginica               6.59             2.97

summarise_if

iris %>% group_by(Species) %>% 
  summarise_if(.predicate = function(x) is.numeric(x),
               .funs = funs(mean="mean"))
## Warning: `funs()` is deprecated as of dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## # A tibble: 3 x 5
##   Species   Sepal.Length_mean Sepal.Width_mean Petal.Length_me… Petal.Width_mean
##   <fct>                 <dbl>            <dbl>            <dbl>            <dbl>
## 1 setosa                 5.01             3.43             1.46            0.246
## 2 versicol…              5.94             2.77             4.26            1.33 
## 3 virginica              6.59             2.97             5.55            2.03

pass multiple functions

iris %>% group_by(Species) %>% 
  summarise_if(.predicate = function(x) is.numeric(x),
               .funs = funs(mean="mean", Sd="sd"))
## # A tibble: 3 x 9
##   Species Sepal.Length_me… Sepal.Width_mean Petal.Length_me… Petal.Width_mean
##   <fct>              <dbl>            <dbl>            <dbl>            <dbl>
## 1 setosa              5.01             3.43             1.46            0.246
## 2 versic…             5.94             2.77             4.26            1.33 
## 3 virgin…             6.59             2.97             5.55            2.03 
## # … with 4 more variables: Sepal.Length_Sd <dbl>, Sepal.Width_Sd <dbl>,
## #   Petal.Length_Sd <dbl>, Petal.Width_Sd <dbl>
Avatar
Thiyanga S. Talagala
PhD in Statistics

Related