eohi/eohi3/knit/DA02_anova_DGEN.rmd

---
title: "Mixed ANOVA - Domain General Vars"
author: ""
date: "`r Sys.Date()`"
output:
  html_document:
    toc: true
    toc_float: true
    code_folding: hide
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = TRUE)
```

# Setup

```{r libraries}
library(tidyverse)
library(rstatix)
library(emmeans)
library(effectsize)
library(afex)
library(car)

options(scipen = 999)
afex::set_sum_contrasts()
```

# Data

```{r read-data}
# Data file is in parent of knit/ (eohi3/eohi3.csv)
df <- read.csv(
  "/home/ladmin/Documents/DND/EOHI/eohi3/eohi3.csv",
  stringsAsFactors = FALSE,
  check.names = FALSE,
  na.strings = "NA"
)

between_vars <- c("perspective", "temporalDO")
within_vars <- c(
  "past_pref_DGEN", "past_pers_DGEN", "past_val_DGEN",
  "fut_pref_DGEN",  "fut_pers_DGEN",  "fut_val_DGEN"
)

missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df))
if (length(missing_vars) > 0) {
  stop(paste("Missing required variables:", paste(missing_vars, collapse = ", ")))
}

anova_data <- df %>%
  select(pID, all_of(between_vars), all_of(within_vars)) %>%
  filter(
    !is.na(perspective), perspective != "",
    !is.na(temporalDO),  temporalDO  != ""
  )
```

# Long format

```{r long-format}
long_data <- anova_data %>%
  pivot_longer(
    cols = all_of(within_vars),
    names_to = "variable",
    values_to = "DGEN_SCORE"
  ) %>%
  mutate(
    time = case_when(
      grepl("^past_", variable) ~ "past",
      grepl("^fut_",  variable) ~ "fut",
      TRUE ~ NA_character_
    ),
    domain = case_when(
      grepl("_pref_DGEN$", variable) ~ "pref",
      grepl("_pers_DGEN$", variable) ~ "pers",
      grepl("_val_DGEN$",  variable) ~ "val",
      TRUE ~ NA_character_
    )
  ) %>%
  mutate(
    TIME       = factor(time,   levels = c("past", "fut")),
    DOMAIN     = factor(domain, levels = c("pref", "pers", "val")),
    perspective = factor(perspective),
    temporalDO  = factor(temporalDO),
    pID         = factor(pID)
  ) %>%
  select(pID, perspective, temporalDO, TIME, DOMAIN, DGEN_SCORE) %>%
  filter(!is.na(DGEN_SCORE))
```

# Descriptive statistics

```{r desc-stats}
desc_stats <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n        = n(),
    mean     = round(mean(DGEN_SCORE), 5),
    variance = round(var(DGEN_SCORE), 5),
    sd       = round(sd(DGEN_SCORE), 5),
    median   = round(median(DGEN_SCORE), 5),
    q1       = round(quantile(DGEN_SCORE, 0.25), 5),
    q3       = round(quantile(DGEN_SCORE, 0.75), 5),
    min      = round(min(DGEN_SCORE), 5),
    max      = round(max(DGEN_SCORE), 5),
    .groups  = "drop"
  )

# Show all rows and columns (no truncation)
options(tibble.width = Inf)
print(desc_stats, n = Inf)
```

Interpretation:
1. Mean and median values are similar w/ slightly more variation than in the domain specific anova.
2. Highest to lowest group n size ratio is 1.14 (139/122). Acceptable ratio for ANOVA (under 1.5).
3. Highest to lowest overall group variance ratio is 1.40 (9.32/6.65). Acceptable ratio for ANOVA (under 4).
   For the sake  of consistency w/ the other EHI studies, I also calculated Hartley's F-max ratio.
   The conservative F-max critical value is 1.60 (same as DS anova since number of groups and n sizes doesn't change), which is still higher than the highest observed F-max ratio of 1.28.

# Assumption checks

## Missing values

```{r missing}
missing_summary <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n_total     = n(),
    n_missing   = sum(is.na(DGEN_SCORE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups     = "drop"
  )

print(missing_summary, n = Inf)
```

No missing values. As expected.

## Outliers

```{r outliers}
outlier_summary <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n          = n(),
    n_outliers = sum(abs(scale(DGEN_SCORE)) > 3),
    .groups    = "drop"
  )

print(outlier_summary, n = Inf)
```

No outliers present. Good. Same as DS anova.

## Homogeneity of variance

```{r homogeneity}
homogeneity_between <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  rstatix::levene_test(DGEN_SCORE ~ perspective * temporalDO)

print(homogeneity_between, n = Inf)
```

Levene's test is signiicant for 1 cell only: past-val. However, variance ratios and F-max tests show that any heteroscedasticity is mild.

## Normality (within-subjects residuals)

```{r normality}
resid_within <- long_data %>%
  group_by(pID) %>%
  mutate(person_mean = mean(DGEN_SCORE, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(resid = DGEN_SCORE - person_mean) %>%
  pull(resid)
resid_within <- resid_within[!is.na(resid_within)]

n_resid <- length(resid_within)
if (n_resid < 3L) {
  message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
} else {
  resid_for_shapiro <- if (n_resid > 5000L) {
    set.seed(1L)
    sample(resid_within, 5000L)
  } else {
    resid_within
  }
  print(shapiro.test(resid_for_shapiro))
}
```

### Q-Q plot

```{r qqplot, fig.height = 4}
qqnorm(resid_within)
qqline(resid_within)
```

Shapiro-Wilk test is significant but is sensitive to large sample size.
QQ plot shows that strict centre residuals are normally distributed, but there is some deviation from normality.
ANOVA is robust to violations of normality w/ large sample size.

Overall, ANOVA can proceed.

# Mixed ANOVA

```{r anova}
aov_afex <- aov_ez(
  id      = "pID",
  dv      = "DGEN_SCORE",
  data    = long_data,
  between = c("perspective", "temporalDO"),
  within  = c("TIME", "DOMAIN"),
  type    = 3,
  anova_table = list(correction = "none")
)

print(aov_afex)
```

Mauchly's test of sphericity is not significant. Using uncorrected values for interpretation and analysis.


Significant main effects and interactions:
                                Effect        df   MSE        F   ges   p
4                                TIME     1, 518  3.11  8.39 **  .001   .004
8                              DOMAIN    2, 1036  2.13 7.85 ***  .001   <.001
10                  temporalDO:DOMAIN    2, 1036  2.13  5.00 ** <.001   .007
15 perspective:temporalDO:TIME:DOMAIN    2, 1036  1.52   3.12 * <.001   .045


# Mauchly and epsilon

```{r mauchly}
anova_wide <- anova_data %>%
  select(pID, perspective, temporalDO, all_of(within_vars)) %>%
  filter(if_all(all_of(within_vars), ~ !is.na(.)))
response_matrix <- as.matrix(anova_wide[, within_vars])
rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide)
idata <- data.frame(
  TIME   = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")),
  DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val"))
)
rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3)
rm_summary <- summary(rm_anova, multivariate = FALSE)
if (!is.null(rm_summary$sphericity.tests)) {
  print(rm_summary$sphericity.tests)
}
if (!is.null(rm_summary$epsilon)) {
  print(rm_summary$epsilon)
}
```

# Post hoc comparisons

## TIME (main effect)

```{r posthoc-TIME}
emm_TIME <- emmeans(aov_afex, ~ TIME)
print(pairs(emm_TIME, adjust = "none"))
```

Supports presence of EHI effect.

## DOMAIN (main effect)

```{r posthoc-domain}
emm_DOMAIN <- emmeans(aov_afex, ~ DOMAIN)
print(pairs(emm_DOMAIN, adjust = "tukey"))
```

Only preference to values contrast is significant.

## temporalDO:DOMAIN

```{r posthoc-temporaldo-domain}
emmeans(aov_afex, pairwise ~ temporalDO | DOMAIN, adjust = "none")$contrasts
emmeans(aov_afex, pairwise ~ DOMAIN | temporalDO, adjust = "tukey")$contrasts
```

When grouped by domain, no contrasts are significant.


When grouped by temporalDO, some contrasts are significant:

Future-first temporal display order:
contrast    estimate     SE  df t.ratio p.value
pref - pers  0.25065 0.0892 518   2.810  0.0142


Past-first temporal display order:
contrast    estimate     SE  df t.ratio p.value
pref - val   0.33129 0.0895 518   3.702  0.0007
pers - val   0.32478 0.0921 518   3.527  0.0013

## perspective:temporalDO:TIME:DOMAIN

### contrasts for TIME grouped by perspective, temporalDO, and DOMAIN
```{r posthoc-fourway}
emm_fourway <- emmeans(aov_afex, pairwise ~ TIME | perspective * temporalDO * DOMAIN, adjust = "tukey")
print(emm_fourway$contrasts)
```

Significant contrasts:

contrast    estimate     SE  df t.ratio p.value
 past - fut   0.5285 0.179 518   2.957  0.0032 (self-perspective, personality domain, past-first temporal display order)
 past - fut   0.5366 0.187 518   2.863  0.0044 (self-perspective, values domain, past-first temporal display order)

 ### contrasts for DOMAIN grouped by perspective, TIME, and temporalDO
```{r posthoc-fourway2}
emm_fourway2 <- emmeans(aov_afex, pairwise ~ DOMAIN | perspective * TIME * temporalDO, adjust = "tukey")
print(emm_fourway2$contrasts)
```

Significant contrasts:

contrast    estimate     SE  df t.ratio p.value
 pref - val    0.6259 0.166 518   3.778  0.0005 (other-perspective, past-directed questions, past-first temporal display order)
 pers - val    0.4892 0.160 518   3.056  0.0066 (other-perspective, past-directed questions, past-first temporal display order)
 pref - val    0.4309 0.168 518   2.559  0.0290 (self-perspective, future-directed questions, past-first temporal display order)

## Cohen's d (significant contrasts only)

```{r cohens-d-significant}
d_data <- anova_data %>%
  mutate(
    past_mean = (past_pref_DGEN + past_pers_DGEN + past_val_DGEN) / 3,
    fut_mean  = (fut_pref_DGEN + fut_pers_DGEN + fut_val_DGEN) / 3,
    pref_mean = (past_pref_DGEN + fut_pref_DGEN) / 2,
    pers_mean = (past_pers_DGEN + fut_pers_DGEN) / 2,
    val_mean  = (past_val_DGEN + fut_val_DGEN) / 2
  )

cohens_d_results <- tibble(
  contrast = character(),
  condition = character(),
  d = double()
)

# TIME main: past vs fut
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "overall",
    d = suppressMessages(effectsize::cohens_d(d_data$past_mean, d_data$fut_mean, paired = TRUE)$Cohens_d)
  )

# DOMAIN main: pref vs val
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "overall",
    d = suppressMessages(effectsize::cohens_d(d_data$pref_mean, d_data$val_mean, paired = TRUE)$Cohens_d)
  )

# temporalDO:DOMAIN - future: pref vs pers
d_fut <- d_data %>% filter(temporalDO == "future")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - pers)",
    condition = "temporalDO = future",
    d = suppressMessages(effectsize::cohens_d(d_fut$pref_mean, d_fut$pers_mean, paired = TRUE)$Cohens_d)
  )

# temporalDO:DOMAIN - past: pref vs val, pers vs val
d_past <- d_data %>% filter(temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "temporalDO = past",
    d = suppressMessages(effectsize::cohens_d(d_past$pref_mean, d_past$val_mean, paired = TRUE)$Cohens_d)
  ) %>%
  add_row(
    contrast = "DOMAIN (pers - val)",
    condition = "temporalDO = past",
    d = suppressMessages(effectsize::cohens_d(d_past$pers_mean, d_past$val_mean, paired = TRUE)$Cohens_d)
  )

# 4-way TIME: self, past temporalDO, pers
d_self_past <- d_data %>% filter(perspective == "self", temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "self, past temporalDO, pers domain",
    d = suppressMessages(effectsize::cohens_d(d_self_past$past_pers_DGEN, d_self_past$fut_pers_DGEN, paired = TRUE)$Cohens_d)
  )

# 4-way TIME: self, past temporalDO, val
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "self, past temporalDO, val domain",
    d = suppressMessages(effectsize::cohens_d(d_self_past$past_val_DGEN, d_self_past$fut_val_DGEN, paired = TRUE)$Cohens_d)
  )

# 4-way DOMAIN: other, past TIME, past temporalDO - pref vs val
d_other_past_tpast <- d_data %>% filter(perspective == "other", temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "other, past TIME, past temporalDO",
    d = suppressMessages(effectsize::cohens_d(d_other_past_tpast$past_pref_DGEN, d_other_past_tpast$past_val_DGEN, paired = TRUE)$Cohens_d)
  )

# 4-way DOMAIN: other, past TIME, past temporalDO - pers vs val
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pers - val)",
    condition = "other, past TIME, past temporalDO",
    d = suppressMessages(effectsize::cohens_d(d_other_past_tpast$past_pers_DGEN, d_other_past_tpast$past_val_DGEN, paired = TRUE)$Cohens_d)
  )

# 4-way DOMAIN: self, fut TIME, past temporalDO - pref vs val
d_self_fut_tpast <- d_data %>% filter(perspective == "self", temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "self, fut TIME, past temporalDO",
    d = suppressMessages(effectsize::cohens_d(d_self_fut_tpast$fut_pref_DGEN, d_self_fut_tpast$fut_val_DGEN, paired = TRUE)$Cohens_d)
  )

cohens_d_results %>%
  mutate(d = round(d, 3)) %>%
  print(n = Inf)
```

Size	d	Interpretation
Small	0.2	Weak effect
Medium	0.5	Moderate effect
Large	0.8	Strong effect