eohi/eohi3/knit/DA01_anova_DS.Rmd

---
title: "Mixed ANOVA - Domain Specific Means (DA01)"
author: ""
date: "`r Sys.Date()`"
output:
  html_document:
    toc: true
    toc_float: true
    code_folding: hide
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = TRUE)
```

# Setup

```{r libraries}
library(tidyverse)
library(rstatix)
library(emmeans)
library(effectsize)
library(afex)
library(car)

options(scipen = 999)
afex::set_sum_contrasts()
```

# Data

```{r read-data}
# Data file is in parent of knit/ (eohi3/eohi3.csv)
df <- read.csv(
  "/home/ladmin/Documents/DND/EOHI/eohi3/eohi3.csv",
  stringsAsFactors = FALSE,
  check.names = FALSE,
  na.strings = "NA"
)

between_vars <- c("perspective", "temporalDO")
within_vars <- c(
  "past_pref_MEAN", "past_pers_MEAN", "past_val_MEAN",
  "fut_pref_MEAN",  "fut_pers_MEAN",  "fut_val_MEAN"
)

missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df))
if (length(missing_vars) > 0) {
  stop(paste("Missing required variables:", paste(missing_vars, collapse = ", ")))
}

anova_data <- df %>%
  select(pID, all_of(between_vars), all_of(within_vars)) %>%
  filter(
    !is.na(perspective), perspective != "",
    !is.na(temporalDO),  temporalDO  != ""
  )
```

# Long format

```{r long-format}
long_data <- anova_data %>%
  pivot_longer(
    cols = all_of(within_vars),
    names_to = "variable",
    values_to = "MEAN_SCORE"
  ) %>%
  mutate(
    time = case_when(
      grepl("^past_", variable) ~ "past",
      grepl("^fut_",  variable) ~ "fut",
      TRUE ~ NA_character_
    ),
    domain = case_when(
      grepl("_pref_MEAN$", variable) ~ "pref",
      grepl("_pers_MEAN$", variable) ~ "pers",
      grepl("_val_MEAN$",  variable) ~ "val",
      TRUE ~ NA_character_
    )
  ) %>%
  mutate(
    TIME       = factor(time,   levels = c("past", "fut")),
    DOMAIN     = factor(domain, levels = c("pref", "pers", "val")),
    perspective = factor(perspective),
    temporalDO  = factor(temporalDO),
    pID         = factor(pID)
  ) %>%
  select(pID, perspective, temporalDO, TIME, DOMAIN, MEAN_SCORE) %>%
  filter(!is.na(MEAN_SCORE))
```

# Descriptive statistics

```{r desc-stats}
desc_stats <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n        = n(),
    mean     = round(mean(MEAN_SCORE), 5),
    variance = round(var(MEAN_SCORE), 5),
    sd       = round(sd(MEAN_SCORE), 5),
    median   = round(median(MEAN_SCORE), 5),
    q1       = round(quantile(MEAN_SCORE, 0.25), 5),
    q3       = round(quantile(MEAN_SCORE, 0.75), 5),
    min      = round(min(MEAN_SCORE), 5),
    max      = round(max(MEAN_SCORE), 5),
    .groups  = "drop"
  )

# Show all rows and columns (no truncation)
options(tibble.width = Inf)
print(desc_stats, n = Inf)
```

Interpretations:
1. Mean and median values are similar, indicating distribution is relatively symmetric and any skew is minimal. Any outliers are not extreme.
2. Highest to lowest group n size ratio is 1.14 (139/122). Acceptable ratio for ANOVA (under 1.5).
3. Highest to lowest overall group variance ratio is 1.67 (7.93/4.74). Acceptable ratio for ANOVA (under 4).
   For the sake  of consistency w/ the other EHI studies, I also calculated Hartley's F-max ratio.
   The conservative F-max critical value is 1.60, which is still higher than the highest observed F-max ratio of 1.53.

# Assumption checks

## Missing values

```{r missing}
missing_summary <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n_total     = n(),
    n_missing   = sum(is.na(MEAN_SCORE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups     = "drop"
  )

print(missing_summary, n = Inf)
```

No missing values. As expected.

## Outliers

```{r outliers}
outlier_summary <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n          = n(),
    n_outliers = sum(abs(scale(MEAN_SCORE)) > 3),
    .groups    = "drop"
  )

print(outlier_summary, n = Inf)
```

No outliers present. Good.

## Homogeneity of variance

```{r homogeneity}
homogeneity_between <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  rstatix::levene_test(MEAN_SCORE ~ perspective * temporalDO)

print(homogeneity_between, n = Inf)
```

Levene's test is sigifnicant for two cells: fut-pers and fut-val.
However, variance ratios and F-max tests show that any heteroscedasticity is mild.

## Normality (within-subjects residuals)

```{r normality}
resid_within <- long_data %>%
  group_by(pID) %>%
  mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(resid = MEAN_SCORE - person_mean) %>%
  pull(resid)
resid_within <- resid_within[!is.na(resid_within)]

n_resid <- length(resid_within)
if (n_resid < 3L) {
  message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
} else {
  resid_for_shapiro <- if (n_resid > 5000L) {
    set.seed(1L)
    sample(resid_within, 5000L)
  } else {
    resid_within
  }
  print(shapiro.test(resid_for_shapiro))
}
```

### Q-Q plot

```{r qqplot, fig.height = 4}
qqnorm(resid_within)
qqline(resid_within)
```

Shapiro-Wilk is significant but is sensitive to large sample size.
QQ plot shows that centre residuals are normally distributed, with some tail heaviness.
ANOVA is robust to violations of normality w/ large sample size.

Overall, ANOVA can proceed.

# Mixed ANOVA

```{r anova}
aov_afex <- aov_ez(
  id      = "pID",
  dv      = "MEAN_SCORE",
  data    = long_data,
  between = c("perspective", "temporalDO"),
  within  = c("TIME", "DOMAIN"),
  type    = 3
)

cat("\n--- ANOVA Table (Type 3, uncorrected) ---\n")
print(nice(aov_afex, correction = "none"))
cat("\n--- ANOVA Table (Type 3, Greenhouse–Geisser correction) ---\n")
print(nice(aov_afex, correction = "GG"))
```

Mauchly's test of sphericity is sig for DOMAIN main effect and interaction (except w/ TIME). Use GG correction for these:
- 8                              DOMAIN 1.94, 1004.66  1.21     0.63 <.001, p = .529
## 9                  perspective:DOMAIN 1.94, 1004.66  1.21 7.79 *** <.001, p <.001
## 10                  temporalDO:DOMAIN 1.94, 1004.66  1.21     0.76 <.001, p = .466
## 11      perspective:temporalDO:DOMAIN 1.94, 1004.66  1.21     0.17 <.001, p = .837


The following are significant main effects and interactions:
## 15 perspective:temporalDO:TIME:DOMAIN 2, 1036  0.75   3.11 * <.001    .045
## 13            perspective:TIME:DOMAIN 2, 1036  0.75   3.58 * <.001    .028
## 9                  perspective:DOMAIN 1.94, 1004.66  1.21 7.79 *** <.001, p <.001 (GG corrected)
## 6                     temporalDO:TIME  1, 518  1.86  9.81 ** <.001    .002
## 7         perspective:temporalDO:TIME  1, 518  1.86  7.91 ** <.001    .005
## 4                                TIME  1, 518  1.86 10.05 ** <.001    .002


# Mauchly and epsilon

```{r mauchly}
anova_wide <- anova_data %>%
  select(pID, perspective, temporalDO, all_of(within_vars)) %>%
  filter(if_all(all_of(within_vars), ~ !is.na(.)))
response_matrix <- as.matrix(anova_wide[, within_vars])
rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide)
idata <- data.frame(
  TIME   = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")),
  DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val"))
)
rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3)
rm_summary <- summary(rm_anova, multivariate = FALSE)
if (!is.null(rm_summary$sphericity.tests)) {
  cat("\nMauchly's Test of Sphericity:\n")
  print(rm_summary$sphericity.tests)
}
if (!is.null(rm_summary$epsilon)) {
  cat("\nEpsilon (GG, HF):\n")
  print(rm_summary$epsilon)
}
```

# Post-hoc comparisons

## TIME (main effect)

```{r posthoc-TIME}
emm_TIME <- emmeans(aov_afex, ~ TIME)
print(pairs(emm_TIME, adjust = "bonferroni"))
```

Pairwise comparison provide supprot for EHI effect.

## temporalDO:TIME

```{r posthoc-temporalDO-TIME}
emm_temporalDO_TIME <- emmeans(aov_afex, ~ TIME | temporalDO)
print(pairs(emm_temporalDO_TIME, adjust = "bonferroni"))
```

Contrast significant only for temporal display order of past first, then future.
When grouped by time instead of temporalDO, no contrasts are significant.

## perspective:temporalDO:TIME

```{r posthoc-pt-TIME}
emm_pt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO)
print(pairs(emm_pt_TIME, adjust = "bonferroni"))
```

EHI is significant only for self perspective and past first temporal display order.

When grouped by perspective or temporalDO instead of TIME, no contrasts are significant.

## perspective:DOMAIN

```{r posthoc-perspective-DOMAIN}
emm_perspective_DOMAIN <- emmeans(aov_afex, ~ perspective | DOMAIN)
print(pairs(emm_perspective_DOMAIN, adjust = "bonferroni"))
emm_perspective_DOMAIN_domain <- emmeans(aov_afex, ~ DOMAIN | perspective)
print(pairs(emm_perspective_DOMAIN_domain, adjust = "bonferroni"))
```

significance is driven by the change from preferences to values in the "other" perspective.

## perspective:TIME:DOMAIN

```{r posthoc-pt-DOMAIN}
emm_pt_TIME_domain <- emmeans(aov_afex, ~ TIME | perspective + DOMAIN)
print(pairs(emm_pt_TIME_domain, adjust = "bonferroni"))
```

EHI effects present for other-perspective in the preferences domain and for self-perspective in the values domain.
Estimate is higher in the self-perspective than in the other-perspective.

```{r posthoc-pt-DOMAIN-domain}
emm_pt_domain_domain <- emmeans(aov_afex, ~ DOMAIN | perspective + TIME)
print(pairs(emm_pt_domain_domain, adjust = "bonferroni"))
```

Significant contrasts are driven by domain changes from preferences to values in the self vs other perspectives, in the past-oriented questions.
Trends reverse depending on perspective, where values have higher estimates than preferences in the self-perspective, but lower estimates than preferences in the other-perspective.

## perspective:temporalDO:TIME:DOMAIN

```{r posthoc-ptt-TIME}
emm_ptt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO + DOMAIN)
print(pairs(emm_ptt_TIME, adjust = "bonferroni"))
```
EHI effects are present for three contrasts:
- past - fut   0.2806 0.118 518   2.380  0.0177 (other-perspective, preferences domain, past-first temporal display order)
- past - fut   0.4358 0.138 518   3.156  0.0017 (self-perspective, personality domain, past-first temporal display order)
- past - fut   0.7276 0.141 518   5.169 <0.0001 (self-perspective, values domain, past-first temporal display order)

A reverse EHI effect is present for 1 contrast:
- past - fut  -0.2367 0.118 518  -2.001  0.0459 (self-personality, preferences domain, future-first temporal display order)

```{r posthoc-ptt-perspective}
emm_ptt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME + DOMAIN)
print(pairs(emm_ptt_perspective, adjust = "bonferroni"))
```
1 significant contrast:
- other - self  -0.6972 0.314 518  -2.220  0.0268 (values domain, past-oriented questions, past-first temporal display order)


not really of theoretical interest but speaks to the perspective:TIME:DOMAIN interaction.

no significant contrasts when grouped by temporalDO instead of TIME or perspective.

## Cohen's d (significant contrasts only)

```{r cohens-d-significant}
d_data <- anova_data %>%
  mutate(
    past_mean = (past_pref_MEAN + past_pers_MEAN + past_val_MEAN) / 3,
    fut_mean  = (fut_pref_MEAN + fut_pers_MEAN + fut_val_MEAN) / 3,
    pref_mean = (past_pref_MEAN + fut_pref_MEAN) / 2,
    pers_mean = (past_pers_MEAN + fut_pers_MEAN) / 2,
    val_mean  = (past_val_MEAN + fut_val_MEAN) / 2
  )

cohens_d_results <- tibble(
  contrast = character(),
  condition = character(),
  d = double()
)

# TIME main: past vs fut
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "overall",
    d = suppressMessages(effectsize::cohens_d(d_data$past_mean, d_data$fut_mean, paired = TRUE)$Cohens_d)
  )

# temporalDO:TIME: past vs fut for temporalDO = past
d_past_tdo <- d_data %>% filter(temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "temporalDO = past",
    d = suppressMessages(effectsize::cohens_d(d_past_tdo$past_mean, d_past_tdo$fut_mean, paired = TRUE)$Cohens_d)
  )

# perspective:temporalDO:TIME: past vs fut for self, past temporalDO
d_self_past <- d_data %>% filter(perspective == "self", temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "self, temporalDO = past",
    d = suppressMessages(effectsize::cohens_d(d_self_past$past_mean, d_self_past$fut_mean, paired = TRUE)$Cohens_d)
  )

# perspective:DOMAIN: pref vs val for perspective = other
d_other <- d_data %>% filter(perspective == "other")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "perspective = other",
    d = suppressMessages(effectsize::cohens_d(d_other$pref_mean, d_other$val_mean, paired = TRUE)$Cohens_d)
  )

# perspective:TIME:DOMAIN - TIME: other, pref
d_other_pref <- d_data %>% filter(perspective == "other")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "other, pref domain",
    d = suppressMessages(effectsize::cohens_d(d_other_pref$past_pref_MEAN, d_other_pref$fut_pref_MEAN, paired = TRUE)$Cohens_d)
  )

# perspective:TIME:DOMAIN - TIME: self, val
d_self_val <- d_data %>% filter(perspective == "self")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "self, val domain",
    d = suppressMessages(effectsize::cohens_d(d_self_val$past_val_MEAN, d_self_val$fut_val_MEAN, paired = TRUE)$Cohens_d)
  )

# perspective:TIME:DOMAIN - DOMAIN: other, past TIME
d_other_past <- d_data %>% filter(perspective == "other")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "other, past TIME",
    d = suppressMessages(effectsize::cohens_d(d_other_past$past_pref_MEAN, d_other_past$past_val_MEAN, paired = TRUE)$Cohens_d)
  )

# perspective:TIME:DOMAIN - DOMAIN: self, past TIME: pref - pers
d_self_past_t <- d_data %>% filter(perspective == "self")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - pers)",
    condition = "self, past TIME",
    d = suppressMessages(effectsize::cohens_d(d_self_past_t$past_pref_MEAN, d_self_past_t$past_pers_MEAN, paired = TRUE)$Cohens_d)
  )

# perspective:TIME:DOMAIN - DOMAIN: self, past TIME: pref - val
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "DOMAIN (pref - val)",
    condition = "self, past TIME",
    d = suppressMessages(effectsize::cohens_d(d_self_past_t$past_pref_MEAN, d_self_past_t$past_val_MEAN, paired = TRUE)$Cohens_d)
  )

# 4-way TIME: self, future temporalDO, pref (reverse EHI)
d_self_fut_pref <- d_data %>% filter(perspective == "self", temporalDO == "future")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut) [reverse]",
    condition = "self, future temporalDO, pref domain",
    d = suppressMessages(effectsize::cohens_d(d_self_fut_pref$past_pref_MEAN, d_self_fut_pref$fut_pref_MEAN, paired = TRUE)$Cohens_d)
  )

# 4-way TIME: other, past temporalDO, pref
d_other_past_pref <- d_data %>% filter(perspective == "other", temporalDO == "past")
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "other, past temporalDO, pref domain",
    d = suppressMessages(effectsize::cohens_d(d_other_past_pref$past_pref_MEAN, d_other_past_pref$fut_pref_MEAN, paired = TRUE)$Cohens_d)
  )

# 4-way TIME: self, past temporalDO, pers
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "self, past temporalDO, pers domain",
    d = suppressMessages(effectsize::cohens_d(d_self_past$past_pers_MEAN, d_self_past$fut_pers_MEAN, paired = TRUE)$Cohens_d)
  )

# 4-way TIME: self, past temporalDO, val
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "TIME (past - fut)",
    condition = "self, past temporalDO, val domain",
    d = suppressMessages(effectsize::cohens_d(d_self_past$past_val_MEAN, d_self_past$fut_val_MEAN, paired = TRUE)$Cohens_d)
  )

# 4-way perspective: past temporalDO, past TIME, val domain (other - self, between-subjects)
d_ptt_val <- d_data %>%
  filter(temporalDO == "past") %>%
  select(perspective, past_val_MEAN)
d_other_ptt <- d_ptt_val %>% filter(perspective == "other") %>% pull(past_val_MEAN)
d_self_ptt <- d_ptt_val %>% filter(perspective == "self") %>% pull(past_val_MEAN)
cohens_d_results <- cohens_d_results %>%
  add_row(
    contrast = "perspective (other - self)",
    condition = "past temporalDO, past TIME, val domain",
    d = suppressMessages(effectsize::cohens_d(d_other_ptt, d_self_ptt, paired = FALSE)$Cohens_d)
  )

cohens_d_results %>%
  mutate(d = round(d, 3)) %>%
  print(n = Inf)
```