################################################################################
##                                                                            ##
##                           BStats - LAB 2                                   ##
##                      Graph the Data with ggplot                            ##
##                                                                            ##
##                         Author: Miguel Salema                              ##
##                                                                            ##
################################################################################

# This class is about a package designed to generate beautiful, 
#   professionally crafted graphs in R. 
# At the outset, it must be noticed that the few sections below are far 
#   from doing justice to this amazing package. 
# Hence, these notes are merely aiming at introducing you to general approach 
#   of the package and inviting you to further explore its capabilities.

# The main principles:
# 1. The data must be tidy. One observation per column;
# 2. Insert in the aesthetics columns from the dataset;
# 3. Build in layers.

# The core of a graph is a **layer**. 
# In general, the elements of a layer have in common a given data. 
# There are four elements in a layer:
# - `data` and `aes`-thetic `mapping`-s of the variables of the data,
# - a `geom`-etric object,
# - a `stat`-istical transformation,
# - a `position` adjustment.

# For further information on ggplot2 see 
# https://ggplot2.tidyverse.org/articles/ggplot2.html

# To plot we need:
# 1. A dataset;
# 2. One mapping;
# 3. Some layers.


##-- Setup ---------------------------------------------------------------------

# Install and load the 'gapminder' dataset 
install.packages("gapminder")
# (comment this line after )

# Load necessary libraries
library(tidyverse)
library(gapminder)

# load the dataset
data("gapminder")
gapminder

# view the dataset
View(gapminder)

##-- The Mapping ---------------------------------------------------------------

# A blank canvas
gapminder %>% 
  ggplot()

# Add one factor aes() 
gapminder %>% 
  ggplot(aes(x= continent))

# add a numeric aes()
gapminder %>% 
  ggplot(aes(x= year))

# see the difference to a factor
gapminder %>% 
  ggplot(aes(x= as.factor(year)))

# Two aes()
gapminder %>% 
  ggplot(aes(y= lifeExp,
             x = year))

##-- A One Aesthetic Plot ------------------------------------------------------

gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(x=continent)) +
  geom_bar()

gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(x=continent)) +
  geom_bar(fill = "#FFFACD")

gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(x = gdpPercap)) + 
  geom_histogram(bins = 50)

##-- A Two Aesthetic Plot ------------------------------------------------------

gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(y = lifeExp,
             x = log(gdpPercap))) + 
  geom_point()


gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(y = lifeExp,
             x = gdpPercap)) + 
  geom_point(size = 5) +
  scale_x_log10()

##-- More Aesthetics -----------------------------------------------------------

gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(y = lifeExp,
             x = gdpPercap,
             size = pop/1000000,
             color = continent)) + 
  geom_point() +
  scale_x_log10() +
  labs(x = "GDP per capita (PPP$ thousands)",
       y = "Life Expentancy (years)",
       color = "Continent",
       size = "Population\n(millions)") + 
  scale_color_manual(values = c("#b34745",
                                "#de8f44",
                                "#A0C7BE",
                                "#00a1d5",
                                "#696598")) + 
  theme_classic()

##-- Explaining Variables' Relationship ----------------------------------------

gapminder %>% 
  filter(year == 2007) %>% 
  ggplot(aes(y = lifeExp,
             x = gdpPercap)) + 
  geom_point(aes(size = pop/1000000,
                 color = continent)) +
  scale_x_log10() +
  labs(x = "GDP per capita (PPP$ thousands)",
       y = "Life Expentancy (years)",
       color = "Continent",
       size = "Population\n(millions)") + 
  scale_color_manual(values = c("#b34745",
                                "#de8f44",
                                "#A0C7BE",
                                "#00a1d5",
                                "#696598")) + 
  geom_smooth(method = "lm",
              se = FALSE,
              color = "black") + 
  theme_bw()

