diff --git a/episodes/12-dplyr.Rmd b/episodes/12-dplyr.Rmd index 01f971eaf..60b1f52b2 100644 --- a/episodes/12-dplyr.Rmd +++ b/episodes/12-dplyr.Rmd @@ -281,12 +281,13 @@ lifeExp_bycountry %>% ::::::::::::::::::::::::: -The function `group_by()` allows us to group by multiple variables. Let's group by `year` and `continent`. +The function `group_by()` allows us to group by multiple variables. If you don't `ungroup()` after summarizing with multiple grouping variables, you may get a warning telling you the data are still partially grouped. Let's group by `year` and `continent`. ```{r} gdp_bycontinents_byyear <- gapminder %>% group_by(continent, year) %>% - summarize(mean_gdpPercap = mean(gdpPercap)) + summarize(mean_gdpPercap = mean(gdpPercap)) %>% + ungroup() ``` That is already quite powerful, but it gets even better! You're not limited to defining 1 new variable in `summarize()`. @@ -297,7 +298,8 @@ gdp_pop_bycontinents_byyear <- gapminder %>% summarize(mean_gdpPercap = mean(gdpPercap), sd_gdpPercap = sd(gdpPercap), mean_pop = mean(pop), - sd_pop = sd(pop)) + sd_pop = sd(pop)) %>% + ungroup() ``` ## count() and n() @@ -350,7 +352,8 @@ gdp_pop_bycontinents_byyear <- gapminder %>% mean_pop = mean(pop), sd_pop = sd(pop), mean_gdp_billion = mean(gdp_billion), - sd_gdp_billion = sd(gdp_billion)) + sd_gdp_billion = sd(gdp_billion)) %>% + ungroup() ``` ## Connect mutate with logical filtering: ifelse @@ -371,7 +374,8 @@ gdp_pop_bycontinents_byyear_above25 <- gapminder %>% mean_pop = mean(pop), sd_pop = sd(pop), mean_gdp_billion = mean(gdp_billion), - sd_gdp_billion = sd(gdp_billion)) + sd_gdp_billion = sd(gdp_billion)) %>% + ungroup() ## updating only if certain condition is fullfilled # for life expectations above 40 years, the gpd to be expected in the future is scaled @@ -379,7 +383,8 @@ gdp_future_bycontinents_byyear_high_lifeExp <- gapminder %>% mutate(gdp_futureExpectation = ifelse(lifeExp > 40, gdpPercap * 1.5, gdpPercap)) %>% group_by(continent, year) %>% summarize(mean_gdpPercap = mean(gdpPercap), - mean_gdpPercap_expected = mean(gdp_futureExpectation)) + mean_gdpPercap_expected = mean(gdp_futureExpectation)) %>% + ungroup() ``` ## Combining `dplyr` and `ggplot2`