library(tidyverse)
library(rstatix)
library(emmeans)
library(effectsize)
library(afex)
library(car)

options(scipen = 999)
afex::set_sum_contrasts()

setwd("/home/ladmin/Documents/DND/EOHI/eohi3")

df <- read.csv(
  "eohi3.csv",
  stringsAsFactors = FALSE,
  check.names = FALSE,
  na.strings = "NA"
)

between_vars <- c("perspective", "temporalDO")
within_vars <- c(
  "past_pref_MEAN", "past_pers_MEAN", "past_val_MEAN",
  "fut_pref_MEAN",  "fut_pers_MEAN",  "fut_val_MEAN"
)

missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df))
if (length(missing_vars) > 0) {
  stop(paste("Missing required variables:", paste(missing_vars, collapse = ", ")))
}

anova_data <- df %>%
  select(pID, all_of(between_vars), all_of(within_vars)) %>%
  filter(
    !is.na(perspective), perspective != "",
    !is.na(temporalDO),  temporalDO  != ""
  )

long_data <- anova_data %>%
  pivot_longer(
    cols = all_of(within_vars),
    names_to = "variable",
    values_to = "MEAN_SCORE"
  ) %>%
  mutate(
    time = case_when(
      grepl("^past_", variable) ~ "past",
      grepl("^fut_",  variable) ~ "fut",
      TRUE ~ NA_character_
    ),
    domain = case_when(
      grepl("_pref_MEAN$", variable) ~ "pref",
      grepl("_pers_MEAN$", variable) ~ "pers",
      grepl("_val_MEAN$",  variable) ~ "val",
      TRUE ~ NA_character_
    )
  ) %>%
  mutate(
    TIME       = factor(time,   levels = c("past", "fut")),
    DOMAIN     = factor(domain, levels = c("pref", "pers", "val")),
    perspective = factor(perspective),
    temporalDO  = factor(temporalDO),
    pID         = factor(pID)
  ) %>%
  select(pID, perspective, temporalDO, TIME, DOMAIN, MEAN_SCORE) %>%
  filter(!is.na(MEAN_SCORE))

desc_stats <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n        = n(),
    mean     = round(mean(MEAN_SCORE), 5),
    variance = round(var(MEAN_SCORE), 5),
    sd       = round(sd(MEAN_SCORE), 5),
    median   = round(median(MEAN_SCORE), 5),
    q1       = round(quantile(MEAN_SCORE, 0.25), 5),
    q3       = round(quantile(MEAN_SCORE, 0.75), 5),
    min      = round(min(MEAN_SCORE), 5),
    max      = round(max(MEAN_SCORE), 5),
    .groups  = "drop"
  )

print(desc_stats, n = Inf)

missing_summary <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n_total     = n(),
    n_missing   = sum(is.na(MEAN_SCORE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups     = "drop"
  )

print(missing_summary, n = Inf)

outlier_summary <- long_data %>%
  group_by(perspective, temporalDO, TIME, DOMAIN) %>%
  summarise(
    n          = n(),
    n_outliers = sum(abs(scale(MEAN_SCORE)) > 3),
    .groups    = "drop"
  )

print(outlier_summary, n = Inf)

homogeneity_between <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  rstatix::levene_test(MEAN_SCORE ~ perspective * temporalDO)

print(homogeneity_between, n = Inf)

# Normality: within-subjects residuals (deviation from each participant's mean)
resid_within <- long_data %>%
  group_by(pID) %>%
  mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(resid = MEAN_SCORE - person_mean) %>%
  pull(resid)
resid_within <- resid_within[!is.na(resid_within)]
n_resid <- length(resid_within)
if (n_resid < 3L) {
  message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
} else {
  resid_for_shapiro <- if (n_resid > 5000L) {
    set.seed(1L)
    sample(resid_within, 5000L)
  } else {
    resid_within
  }
  print(shapiro.test(resid_for_shapiro))
}
# qqnorm(resid_within)
# qqline(resid_within)

aov_afex <- aov_ez(
  id      = "pID",
  dv      = "MEAN_SCORE",
  data    = long_data,
  between = c("perspective", "temporalDO"),
  within  = c("TIME", "DOMAIN"),
  type    = 3
)

# ANOVA table: uncorrected and Greenhouse–Geisser
cat("\n--- ANOVA Table (Type 3, uncorrected) ---\n")
print(nice(aov_afex, correction = "none"))
cat("\n--- ANOVA Table (Type 3, Greenhouse–Geisser correction) ---\n")
print(nice(aov_afex, correction = "GG"))

# Mauchly's test of sphericity and epsilon (via car::Anova on wide data)
anova_wide <- anova_data %>%
  select(pID, perspective, temporalDO, all_of(within_vars)) %>%
  filter(if_all(all_of(within_vars), ~ !is.na(.)))
response_matrix <- as.matrix(anova_wide[, within_vars])
rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide)
idata <- data.frame(
  TIME   = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")),
  DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val"))
)
rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3)
rm_summary <- summary(rm_anova, multivariate = FALSE)
if (!is.null(rm_summary$sphericity.tests)) {
  cat("\nMauchly's Test of Sphericity:\n")
  print(rm_summary$sphericity.tests)
}
if (!is.null(rm_summary$epsilon)) {
  cat("\nEpsilon (GG, HF):\n")
  print(rm_summary$epsilon)
}

# Within-subjects residuals: deviation from each participant's mean (one per observation)
resid_within <- long_data %>%
  group_by(pID) %>%
  mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(resid = MEAN_SCORE - person_mean) %>%
  pull(resid)
resid_within <- resid_within[!is.na(resid_within)]
# R's shapiro.test() allows 3 <= n <= 5000; use a random sample of 5000 if we have more
n_resid <- length(resid_within)
if (n_resid < 3L) {
  message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
} else {
  resid_for_shapiro <- if (n_resid > 5000L) {
    set.seed(1L)
    sample(resid_within, 5000L)
  } else {
    resid_within
  }
  print(shapiro.test(resid_for_shapiro))
}

# qqnorm(resid_within)
# qqline(resid_within)

# POST-HOC COMPARISONS (significant effects only)

# TIME (main effect)
emm_TIME <- emmeans(aov_afex, ~ TIME)
print(pairs(emm_TIME, adjust = "bonferroni"))

# temporalDO:TIME — ~TIME and ~temporalDO
emm_temporalDO_TIME <- emmeans(aov_afex, ~ TIME | temporalDO)
print(pairs(emm_temporalDO_TIME, adjust = "bonferroni"))
emm_temporalDO_temporalDO <- emmeans(aov_afex, ~ temporalDO | TIME)
print(pairs(emm_temporalDO_temporalDO, adjust = "bonferroni"))

# perspective:temporalDO:TIME — ~TIME, ~perspective, ~temporalDO
emm_pt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO)
print(pairs(emm_pt_TIME, adjust = "bonferroni"))
emm_pt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME)
print(pairs(emm_pt_perspective, adjust = "bonferroni"))
emm_pt_temporalDO <- emmeans(aov_afex, ~ temporalDO | perspective + TIME)
print(pairs(emm_pt_temporalDO, adjust = "bonferroni"))

# perspective:DOMAIN — ~perspective and ~DOMAIN
emm_perspective_DOMAIN <- emmeans(aov_afex, ~ perspective | DOMAIN)
print(pairs(emm_perspective_DOMAIN, adjust = "bonferroni"))
emm_perspective_DOMAIN_domain <- emmeans(aov_afex, ~ DOMAIN | perspective)
print(pairs(emm_perspective_DOMAIN_domain, adjust = "bonferroni"))

# perspective:TIME:DOMAIN — ~TIME, ~perspective, ~DOMAIN
emm_pt_TIME_domain <- emmeans(aov_afex, ~ TIME | perspective + DOMAIN)
print(pairs(emm_pt_TIME_domain, adjust = "bonferroni"))
emm_pt_domain_perspective <- emmeans(aov_afex, ~ perspective | TIME + DOMAIN)
print(pairs(emm_pt_domain_perspective, adjust = "bonferroni"))
emm_pt_domain_domain <- emmeans(aov_afex, ~ DOMAIN | perspective + TIME)
print(pairs(emm_pt_domain_domain, adjust = "bonferroni"))

# perspective:temporalDO:TIME:DOMAIN — ~TIME, ~perspective, ~temporalDO
emm_ptt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO + DOMAIN)
print(pairs(emm_ptt_TIME, adjust = "bonferroni"))
emm_ptt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME + DOMAIN)
print(pairs(emm_ptt_perspective, adjust = "bonferroni"))
emm_ptt_temporalDO <- emmeans(aov_afex, ~ temporalDO | perspective + TIME + DOMAIN)
print(pairs(emm_ptt_temporalDO, adjust = "bonferroni"))