The gapminder
data set contains demographic statistics popularized by Hans Rosling’s TED talks.
library(gapminder)
gapminder
Which countries had the fastest growing GDP’s between 1952 and 2007?
In R, you manipulate data by passing the data to functions.
round(1234.567, digits = 2)
nrow(gapminder)
The tidyverse is a collection of R packages that contain functions. You must load the packages to use the functions.
## install.packages("tidyverse")
library("tidyverse")
Tidyverse functions are designed to be:
gapminder
arrange(gapminder, desc(pop))
gapminder2007 <- gapminder
gapminder2007
gapminder2007 <- filter(gapminder, year == 2007)
arrange(gapminder2007, desc(pop))
Use the pipe operator (%>%) to compose tidyverse functions.
gapminder2007 <- filter(gapminder, year == 2007)
arrange(gapminder2007, desc(pop))
gapminder %>%
filter(year == 2007) %>%
arrange(desc(pop))
gapminder
gapminder %>%
filter(year == 2007) %>%
arrange(desc(lifeExp)) %>%
select(country, lifeExp)
gapminder
gapminder %>%
mutate(gdp = pop * gdpPercap)
gapminder %>%
mutate(gdp = pop * gdpPercap)
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
summarise(max_gdp = max(gdp))
gapminder %>%
mutate(gdp = pop * gdpPercap)
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
summarise(first_gdp = first(gdp), last_gdp = last(gdp))
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
summarise(first_gdp = first(gdp), last_gdp = last(gdp))
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
summarise(first_gdp = first(gdp), last_gdp = last(gdp))
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
summarise(gdp1952 = first(gdp), gdp2007 = last(gdp))
gapminder %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
summarise(gdp1952 = first(gdp), gdp2007 = last(gdp)) %>%
mutate(cagr = ((gdp2007 / gdp1952) ^ (1/55) - 1) * 100) %>%
arrange(desc(cagr)) %>%
select(country, cagr)
Each tidyverse function expects and returns the same type of data: tidy data. A tabular data set is tidy iff:
Let’s focus on the 10 biggest economies (in 1952). What are they?
gapminder
gapminder %>%
filter(year == 1952) %>%
mutate(gdp = pop * gdpPercap) %>%
arrange(desc(gdp)) %>%
select(country, gdp)
top_10 <- c("United States", "United Kingdom", "Germany", "France",
"Japan", "Italy", "China", "India", "Canada", "Brazil")
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
gapminder
gapminder %>%
filter(country %in% top_10) %>%
mutate(gdp = pop * gdpPercap) %>%
ggplot() +
geom_line(mapping = aes(x = year, y = gdp, color = country))
Let’s scale the data within each country to make growth easier to compare
gapminder %>%
filter(country %in% top_10) %>%
mutate(gdp = pop * gdpPercap)
gapminder %>%
filter(country %in% top_10) %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
mutate(scaled_gdp = gdp / first(gdp)) %>%
ggplot() +
geom_line(mapping = aes(x = year, y = scaled_gdp, color = country))
gapminder %>%
filter(country %in% top_10) %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
summarise(start = first(gdp), end = last(gdp)) %>%
mutate(cagr = ((end/start) ^ (1 / 55) - 1) * 100) %>%
arrange(desc(cagr)) %>%
select(country, cagr)
gapminder %>%
filter(country %in% top_10) %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
summarise(start = first(gdp), end = last(gdp)) %>%
mutate(cagr = ((end/start) ^ (1 / 55) - 1) * 100) %>%
arrange(desc(cagr)) %>%
select(country, cagr) %>%
ggplot() +
geom_col(mapping = aes(x = country, y = cagr))
library(forcats)
gapminder %>%
filter(country %in% top_10) %>%
mutate(gdp = pop * gdpPercap) %>%
group_by(country) %>%
summarise(start = first(gdp), end = last(gdp)) %>%
mutate(cagr = ((end/start) ^ (1 / 55) - 1) * 100) %>%
arrange(desc(cagr)) %>%
ggplot() +
geom_col(mapping = aes(x = fct_reorder(country, cagr), y = cagr)) +
labs(x = "Country", y = "Cumulative Annual Growth Rate in GDP (1952-2007)") +
coord_flip()