
# page 7
x <- seq(0, pi * 2, 0.1)
sin_x <- sin(x)
y <- sin_x + rnorm(n = length(x), mean = 0, sd = sd(sin_x / 2))
sample_data <- data.frame(y, x)

#install.packages("ggplot2")
library(ggplot2)
ggplot(sample_data, aes(x, y)) +  geom_point()

# page 8
lm_y <- lm(y ~ x, data = sample_data)
ggplot(sample_data, aes(x, y)) +
  geom_point() +
  geom_smooth(method = lm)

# page 9
summary(lm_y)
plot(lm_y, which = 1)


# page 11

#install.packages("ggplot2")
#install.packages("mgcv")
#install.packages("itsadug")

library(ggplot2)
library(mgcv)
library(itsadug)



## DATA 1: GAM_TUTORIAL.CSV
###############################

#p. 15

df <- read.csv("gam_tutorial.csv")
head(df)

df2 <- subset(df, Gender == 2)

linear_model <- gam(VOT ~ F0, data = df2)
summary(linear_model)

data_plot <- ggplot(data = df2, aes(y = VOT, x = F0)) +
  geom_point() +
  geom_line(aes(y = fitted(linear_model)),
            colour = "red", linewidth = 1.2) +
  theme_bw()
data_plot

gam_model <- gam(VOT ~ s(F0), data = df2)
summary(gam_model)

data_plot <- data_plot +
     geom_line(aes(y = fitted(gam_model)),
               colour = "blue", size = 1.2)
data_plot

plot(gam_model)

# p. 16
linear_model <- gam(VOT ~ F0, data = df2)
smooth_model <- gam(VOT ~ s(F0), data = df2)
AIC(linear_model, smooth_model)

# p24

# DATA 2: gam_data
#########################


# ?gamSim
gam_data <- gamSim(eg = 5)

# p. 25
head(gam_data)

# p. 26
basic_model <- gam(y ~ x0 + s(x1), data = gam_data)
basic_summary <- summary(basic_model)
basic_summary$p.table
basic_summary$s.table

# p. 28
two_term_model <- gam(y ~ x0 + s(x1) + x2, data = gam_data)
two_term_summary <- summary(two_term_model)
two_term_summary$p.table
two_term_summary$s.table

# p. 29
two_smooth_model <- gam(y ~ x0 + s(x1) + s(x2), data = gam_data)
two_smooth_summary <- summary(two_smooth_model)
two_smooth_summary$p.table
two_smooth_summary$s.table

# p. 30
AIC(basic_model, two_term_model, two_smooth_model)


###
# p. 33
head(df)
df$Gender <- as.factor(df$Gender)

basic_model <- gam(VOT ~ Gender + s(F0), data = df, method = "REML")
basic_summary <- summary(basic_model)

basic_summary$p.table

basic_summary$s.table

#
two_term_model <- gam(VOT ~ Gender + s(F0) + VDUR,
                      data = df2, method = "REML")
two_term_summary <- summary(two_term_model)

two_term_summary$p.table

two_term_summary$s.table

#
two_smooth_model <- gam(VOT ~ Gender + s(F0) + s(VDUR),
                        data = df2, method = "REML")
two_smooth_summary <- summary(two_smooth_model)

two_smooth_summary$p.table

two_smooth_summary$s.table


AIC(basic_model, two_term_model, two_smooth_model)

# factor_interaction 

factor_interact <- gam(VOT ~ Gender +
                         s(F0, by=Gender) +
                         s(VDUR),
                       data = df2, method = "REML")

summary(factor_interact)$s.table


# p. 34
vis.gam(factor_interact, theta = 120, n.grid = 50, lwd = .4)

AIC(two_smooth_model, factor_interact)

# p. 35
smooth_interact <- gam(VOT ~ Gender + s(F0, VDUR),
                       data = df2, method = "REML")
summary(smooth_interact)$s.table


# p. 36
vis.gam(smooth_interact,
        view = c("F0", "VDUR"),
        theta = 50, n.grid = 50, lwd = .4)

AIC(two_smooth_model, smooth_interact)


## DATA 3: gam_data2
##################################################

# generate and view data
gam_data2 <- gamSim(eg = 6)
head(gam_data2)

# run random intercept model
gamm_intercept <- gam(y ~ s(x0) + s(fac, bs = "re"), data = gam_data2, method = "REML")

# examine model output
summary(gamm_intercept)$s.table



# Plot the summed effect of x0 (without random effects)
plot_smooth(gamm_intercept, view = "x0", rm.ranef = TRUE,
            main = "intercept + s(x1)")

# Plot each level of the random effect
plot_smooth(gamm_intercept, view = "x0", rm.ranef = FALSE,
            cond = list(fac="1"),
            main = "... + s(fac)", col = 'orange', ylim = c(0,25))
plot_smooth(gamm_intercept, view = "x0", rm.ranef = FALSE,
            cond = list(fac = "2"),
            add = TRUE, col = 'red')
plot_smooth(gamm_intercept, view="x0", rm.ranef = FALSE,
            cond = list(fac = "3"),
            add = TRUE, col = 'purple')
plot_smooth(gamm_intercept, view="x0", rm.ranef = FALSE,
            cond = list(fac = "4"),
            add = TRUE, col = 'turquoise')

gamm_slope <- gam(y ~ s(x0) + s(x0, fac, bs = "re"), data = gam_data2, method = "REML")

summary(gamm_slope)$s.table

par(mfrow = c(1,2), cex = 1.1)

# Plot the summed effect of x0 (without random effects)
plot_smooth(gamm_slope, view = "x0", rm.ranef = TRUE,
            main = "intercept + s(x1)")

# Plot each level of the random effect
plot_smooth(gamm_slope, view = "x0", rm.ranef = FALSE,
            cond = list(fac="1"),
            main = "... + s(fac, x0)", col = 'orange', ylim = c(0,25))
plot_smooth(gamm_slope, view = "x0", rm.ranef = FALSE,
            cond = list(fac = "2"),
            add = TRUE, col = 'red')
plot_smooth(gamm_slope, view="x0", rm.ranef = FALSE,
            cond = list(fac = "3"),
            add = TRUE, col = 'purple')
plot_smooth(gamm_slope, view="x0", rm.ranef = FALSE,
            cond = list(fac = "4"),
            add = TRUE, col = 'turquoise')

gamm_int_slope <- gam(y ~ s(x0) + s(fac, bs = "re") + s(fac, x0, bs = "re"),
                      data = gam_data2, method = "REML")

summary(gamm_int_slope)$s.table


par(mfrow = c(1,2), cex = 1.1)
# Plot the summed effect of x0 (without random effects)
plot_smooth(gamm_int_slope, view = "x0", rm.ranef = TRUE,
            main = "intercept + s(x1)")

# Plot each level of the random effect
plot_smooth(gamm_int_slope, view = "x0", rm.ranef = FALSE,
            cond = list(fac="1"),
            main = "... + s(fac) + s(fac, x0)", col = 'orange', ylim = c(0,25))
plot_smooth(gamm_int_slope, view = "x0", rm.ranef = FALSE,
            cond = list(fac = "2"),
            add = TRUE, col = 'red')
plot_smooth(gamm_int_slope, view="x0", rm.ranef = FALSE,
            cond = list(fac = "3"),
            add = TRUE, col = 'purple')
plot_smooth(gamm_int_slope, view="x0", rm.ranef = FALSE,
            cond = list(fac = "4"),
            add = TRUE, col = 'turquoise')

plot(gamm_int_slope, select=3)
# select = 3 because the random slope appears as the third entry in your summary table.

gamm_smooth <- gam(y ~ s(x0) + s(x0, fac, bs = "fs", m = 1),
                   data = gam_data2, method = "REML")

summary(gamm_smooth)$s.table

plot(gamm_smooth, select=1)
# select = 1 because the smooth slope appears as the first entry in your summary table.


par(mfrow = c(1,2), cex = 1.1)

# Plot the summed effect of x0 (without random effects)
plot_smooth(gamm_smooth, view = "x0", rm.ranef = TRUE,
            main = "intercept + s(x1)")

# Plot each level of the random effect
plot_smooth(gamm_smooth, view = "x0", rm.ranef = FALSE,
            cond = list(fac="1"),
            main = "... + s(fac) + s(fac, x0)", col = 'orange', ylim = c(0,25))
plot_smooth(gamm_smooth, view = "x0", rm.ranef = FALSE,
            cond = list(fac = "2"),
            add = TRUE, col = 'red')
plot_smooth(gamm_smooth, view="x0", rm.ranef = FALSE,
            cond = list(fac = "3"),
            add = TRUE, col = 'purple')
plot_smooth(gamm_smooth, view="x0", rm.ranef = FALSE,
            cond = list(fac = "4"),
            add = TRUE, col = 'turquoise')

AIC(gamm_intercept, gamm_slope, gamm_int_slope, gamm_smooth)

### 
#### Nottingham temperature time series
data(nottem)

# the number of years of data (20 years)
n_years <- length(nottem)/12

# categorical variable coding for the 12 months of the year, for every
# year sampled (so, a sequence 1 to 12 repeated for 20 years).
nottem_month <- rep(1:12, times = n_years)

# the year corresponding to each month in nottem_month
nottem_year <- rep(1920:(1920 + n_years - 1), each = 12)

# Plot the time series
qplot(x = nottem_month, y = nottem,
      colour = factor(nottem_year),
      geom = "line") +
  theme_bw()

year_gam <- gam(nottem ~ s(nottem_year) + s(nottem_month, bs = "cc"), method = "REML")
summary(year_gam)$s.table



## time-series - year
par(mfrow = c(1,2))
acf(resid(year_gam), lag.max = 36, main = "ACF")
pacf(resid(year_gam), lag.max = 36, main = "pACF")

df <- data.frame(nottem, nottem_year, nottem_month)

year_gam <- gamm(nottem ~ s(nottem_year) + s(nottem_month, bs = "cc"), data = df)

year_gam_AR1 <- gamm(nottem ~ s(nottem_year) + s(nottem_month, bs = "cc"),
                     correlation = corARMA(form = ~ 1|nottem_year, p = 1),
                     data = df)

year_gam_AR2 <- gamm(nottem ~ s(nottem_year) + s(nottem_month, bs = "cc"),
                     correlation = corARMA(form = ~ 1|nottem_year, p = 2),
                     data = df)

AIC(year_gam$lme, year_gam_AR1$lme, year_gam_AR2$lme)





