library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.4     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
library(shiny)
library(rmarkdown)
library(dagitty)
library(ggdag)
## 
## Attaching package: 'ggdag'
## The following object is masked from 'package:stats':
## 
##     filter
library(broom)
library(flextable)
## 
## Attaching package: 'flextable'
## The following object is masked from 'package:purrr':
## 
##     compose
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following objects are masked from 'package:flextable':
## 
##     border, font, rotate
library(nlme)
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car
## 
## Attaching package: 'lme4'
## The following object is masked from 'package:nlme':
## 
##     lmList
library(mice)
## 
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
## 
##     filter
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(naniar)
# Read in data and convert from wide to long
full_data <- read_csv("../Data/Cross-sec_full.csv", na=c(".", "", " "))
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_double(),
##   SSM_ASD_v24 = col_character(),
##   `V24 demographics,Risk` = col_character(),
##   GROUP = col_character(),
##   Study_Site = col_character(),
##   Gender = col_character()
## )
## i Use `spec()` for the full column specifications.
names(full_data) <- gsub(" |,",".",names(full_data))

Example: Mullen composite and Visit

When running a mixed model in R, the data must be long form. That is, each observation for a subject must be a separate row in your data. The R code below converts the dataset to long form (this is the same code as was used in the long form example in Chapter 4).

mixed_model_data <- full_data %>%
  select(Identifiers, GROUP, Gender,
         V36.mullen.composite_standard_score:V12.mullen.composite_standard_score)

vars_to_convert <- names(mixed_model_data)[c(-1,-2,-3)]

mixed_model_data <- mixed_model_data %>%
  gather(variable, var_value, vars_to_convert) %>%
  separate(variable,c("Visit","Variable"),sep=3) %>% 
  spread(key=Variable, value=var_value) %>%
  plyr::rename(c(".mullen.composite_standard_score"="Mullen_Composite_Score")) %>%
  mutate(ASD_Diag = factor(ifelse(grepl("ASD", GROUP), "ASD_Neg", "ASD_Pos")),
         Visit=factor(Visit),
         Visit_Num=as.numeric(Visit)-1,
         Mullen_Composite_Score=as.numeric(Mullen_Composite_Score)) %>%
  arrange(Identifiers, Visit)
## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(vars_to_convert)` instead of `vars_to_convert` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

Now, let us fit model Mullen composite score as the outcome and visit number (1st, 2nd, 3rd, or 4th) and ASD diagnosis (positive or negative) as covariates. Note that in order for the intercept in the model to be interpretable, we code visit so that 0 reflects the first visit, 1 reflects the second visit, etc. We fit the following random intercept-only model:

\(Mullen_{ij}=\beta_{0}+\beta_{1}Visit_{ij}+\beta_{2}Group_{ij}+\phi_{i}+\epsilon_{ij}\)

where \(\phi_i\) are independent across index \(i\) and \(\epsilon_{ij}\) are independent across \(i\).

To fit this model in R, we can either use the lme4 package or the nlme package. The nlme package is more flexible, so it is covered here. The function used from this package is called lme(), and it works similarly to the lm().

# Fitt mixed effects model
mixed_fit <-
  lme(Mullen_Composite_Score~Visit_Num + ASD_Diag, 
    data=mixed_model_data,
    random = ~1|Identifiers,
    na.action = na.omit)
# Print out fixed effects results and other useful results
summary(mixed_fit)
## Linear mixed-effects model fit by REML
##  Data: mixed_model_data 
##        AIC      BIC    logLik
##   15192.52 15220.14 -7591.262
## 
## Random effects:
##  Formula: ~1 | Identifiers
##         (Intercept) Residual
## StdDev:    7.442339 13.07822
## 
## Fixed effects: Mullen_Composite_Score ~ Visit_Num + ASD_Diag 
##                    Value Std.Error   DF  t-value p-value
## (Intercept)     86.41244 1.1490190 1264 75.20541   0e+00
## Visit_Num        1.14286 0.3058513 1264  3.73667   2e-04
## ASD_DiagASD_Pos 15.21819 1.1648815  585 13.06415   0e+00
##  Correlation: 
##                 (Intr) Vst_Nm
## Visit_Num       -0.382       
## ASD_DiagASD_Pos -0.856  0.036
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -3.103657307 -0.570984554 -0.004197724  0.595802347  3.191924869 
## 
## Number of Observations: 1852
## Number of Groups: 587
# Print out F-tests
anova.lme(mixed_fit)
##             numDF denDF  F-value p-value
## (Intercept)     1  1264 53343.75  <.0001
## Visit_Num       1  1264    10.69  0.0011
## ASD_Diag        1   585   170.67  <.0001
# Print out the random effect variance-covariance matrix
getVarCov(mixed_fit)
## Random effects variance covariance matrix
##             (Intercept)
## (Intercept)      55.388
##   Standard Deviations: 7.4423
# Print out the residual variance-covariance matrix
getVarCov(mixed_fit, type="conditional", individuals="1")
## Identifiers 1 
## Conditional variance covariance matrix
##        1      2      3      4
## 1 171.04   0.00   0.00   0.00
## 2   0.00 171.04   0.00   0.00
## 3   0.00   0.00 171.04   0.00
## 4   0.00   0.00   0.00 171.04
##   Standard Deviations: 13.078 13.078 13.078 13.078
# Print out boxplot of random intercepts, byt diagnosis group
# First get dataset used in mixed effect model (i.e. removing missing values)
mixed_effect_data <- getData(mixed_fit)

# Add random intercepts using predict()
mixed_effect_data <- data.frame(mixed_effect_data %>%
                                  select(Identifiers, GROUP) %>%
                                  distinct(.),
                                "random_ints"=mixed_fit$coefficients$random$Identifiers)

# Now plot
ggplot(data=mixed_effect_data,
       mapping=aes(x=GROUP, y=`X.Intercept.`, fill=GROUP))+
  geom_boxplot()

mixed_effect_data_complete <- getData(mixed_fit)

# Add predicted values using predict function
mixed_effect_data_complete <- data.frame(mixed_effect_data_complete, predict(mixed_fit))

ggplot(data=mixed_effect_data_complete, mapping=aes(x=Visit, 
                                          y=predict.mixed_fit.,
                                          color=ASD_Diag,
                                          group=Identifiers))+
  geom_point()+
  geom_line()+
  labs(y="Fitted Value: Mullen Composite", 
       title="Mixed model fitted values without interaction term")

Note that no interaction terms between visit and ASD diagnosis were included It may make sense to include an interaction term between these variables; this would imply that the change in Mullen composite score over time is different between the ASD diagnosis groups. That model would be the following:

\(Mullen_{ij}=\beta_{0}+\beta_{1}Visit_{ij}+\beta_{2}Group_{ij}+\beta_{3}Group_{ij}*Visit_{ij}+\phi_{i}+\epsilon_{ij}\) where \(\phi_i\) are independent across index \(i\) and \(\epsilon_{ij}\) are independent across \(i\).

# Fit model
mixed_fit_interact <-
  lme(Mullen_Composite_Score~Visit_Num + ASD_Diag + Visit_Num*ASD_Diag, 
    data=mixed_model_data,
    random = ~1|Identifiers,
    na.action = na.omit)

# Print out results
summary(mixed_fit_interact)
## Linear mixed-effects model fit by REML
##  Data: mixed_model_data 
##        AIC      BIC    logLik
##   15084.22 15117.35 -7536.108
## 
## Random effects:
##  Formula: ~1 | Identifiers
##         (Intercept) Residual
## StdDev:    7.851045 12.49571
## 
## Fixed effects: Mullen_Composite_Score ~ Visit_Num + ASD_Diag + Visit_Num * ASD_Diag 
##                              Value Std.Error   DF  t-value p-value
## (Intercept)               95.75193 1.4442168 1263 66.30025  0.0000
## Visit_Num                 -5.35939 0.6757939 1263 -7.93051  0.0000
## ASD_DiagASD_Pos            3.89798 1.5796181  585  2.46767  0.0139
## Visit_Num:ASD_DiagASD_Pos  8.02574 0.7498930 1263 10.70251  0.0000
##  Correlation: 
##                           (Intr) Vst_Nm ASD_DA
## Visit_Num                 -0.672              
## ASD_DiagASD_Pos           -0.914  0.614       
## Visit_Num:ASD_DiagASD_Pos  0.605 -0.901 -0.669
## 
## Standardized Within-Group Residuals:
##           Min            Q1           Med            Q3           Max 
## -3.3559289829 -0.5953731509  0.0003434236  0.5877083391  3.1766641043 
## 
## Number of Observations: 1852
## Number of Groups: 587
# Create plot
mixed_effect_data_complete <- getData(mixed_fit_interact)
mixed_effect_data_complete <- data.frame(mixed_effect_data_complete, predict(mixed_fit_interact))

ggplot(data=mixed_effect_data_complete, mapping=aes(x=Visit, 
                                          y=predict.mixed_fit_interact.,
                                          color=ASD_Diag,
                                          group=Identifiers))+
  geom_point()+
  geom_line()+
  labs(y="Fitted Value: Mullen Composite", 
       title="Mixed model fitted values with interaction term")

Finally, we consider also adding in a random slope for age, to reflect subject-specific age trajectories:

\(Mullen_{ij}=\beta_{0}+\beta_{1}Visit_{ij}+\beta_{2}Group_{ij}+\beta_{3}Group_{ij}*Visit_{ij}+\phi_{0,i}+\phi_{1,i}Visit_{ij}+\epsilon_{ij}\) where \(\phi_i\) are independent across index \(i\) and \(\epsilon_{ij}\) are independent across \(i\).

## Fit model
mixed_fit_interact_rand_slope <-
  lme(Mullen_Composite_Score~Visit_Num + ASD_Diag + Visit_Num*ASD_Diag, 
    data=mixed_model_data,
    random = ~1+Visit_Num|Identifiers,
    na.action = na.omit)

## Print out resultsmixed_effect_data_complete$
summary(mixed_fit_interact_rand_slope)
## Linear mixed-effects model fit by REML
##  Data: mixed_model_data 
##        AIC      BIC    logLik
##   14924.86 14969.04 -7454.431
## 
## Random effects:
##  Formula: ~1 + Visit_Num | Identifiers
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev    Corr  
## (Intercept)  6.256243 (Intr)
## Visit_Num    5.483750 -0.188
## Residual    10.694466       
## 
## Fixed effects: Mullen_Composite_Score ~ Visit_Num + ASD_Diag + Visit_Num * ASD_Diag 
##                              Value Std.Error   DF  t-value p-value
## (Intercept)               95.88271 1.2259417 1263 78.21148  0.0000
## Visit_Num                 -5.50422 0.8306790 1263 -6.62616  0.0000
## ASD_DiagASD_Pos            3.79570 1.3408254  585  2.83087  0.0048
## Visit_Num:ASD_DiagASD_Pos  8.24438 0.9162889 1263  8.99758  0.0000
##  Correlation: 
##                           (Intr) Vst_Nm ASD_DA
## Visit_Num                 -0.573              
## ASD_DiagASD_Pos           -0.914  0.523       
## Visit_Num:ASD_DiagASD_Pos  0.519 -0.907 -0.574
## 
## Standardized Within-Group Residuals:
##          Min           Q1          Med           Q3          Max 
## -2.828231905 -0.557034757  0.008772279  0.551159793  3.156926661 
## 
## Number of Observations: 1852
## Number of Groups: 587
## Create plot
mixed_effect_data_complete <- getData(mixed_fit_interact_rand_slope)
mixed_effect_data_complete <- 
  data.frame(mixed_effect_data_complete, "predicted_value"=predict(mixed_fit_interact_rand_slope))
# Notice above, we create variable "predicted_value" to hold each subject's fitted values, including random effect realizations, in dataset called mixed_effect_data_complete ("complete" meaning all subjects with missing data for variables in model are removed from dataset to be used in plot)

ggplot(data=mixed_effect_data_complete, mapping=aes(x=Visit, 
                                          y=predicted_value,
                                          color=ASD_Diag,
                                          group=Identifiers))+
  geom_point()+
  geom_line()+
  labs(y="Fitted Value: Mullen Composite", 
       title="Mixed model fitted values with interaction term\nand random slope for visit")