library(tidyverse) library(rstatix) library(emmeans) library(effectsize) library(afex) library(car) options(scipen = 999) afex::set_sum_contrasts() setwd("/home/ladmin/Documents/DND/EOHI/eohi3") df <- read.csv( "eohi3.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA" ) between_vars <- c("perspective", "temporalDO") within_vars <- c( "past_pref_MEAN", "past_pers_MEAN", "past_val_MEAN", "fut_pref_MEAN", "fut_pers_MEAN", "fut_val_MEAN" ) missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df)) if (length(missing_vars) > 0) { stop(paste("Missing required variables:", paste(missing_vars, collapse = ", "))) } anova_data <- df %>% select(pID, all_of(between_vars), all_of(within_vars)) %>% filter( !is.na(perspective), perspective != "", !is.na(temporalDO), temporalDO != "" ) long_data <- anova_data %>% pivot_longer( cols = all_of(within_vars), names_to = "variable", values_to = "MEAN_SCORE" ) %>% mutate( time = case_when( grepl("^past_", variable) ~ "past", grepl("^fut_", variable) ~ "fut", TRUE ~ NA_character_ ), domain = case_when( grepl("_pref_MEAN$", variable) ~ "pref", grepl("_pers_MEAN$", variable) ~ "pers", grepl("_val_MEAN$", variable) ~ "val", TRUE ~ NA_character_ ) ) %>% mutate( TIME = factor(time, levels = c("past", "fut")), DOMAIN = factor(domain, levels = c("pref", "pers", "val")), perspective = factor(perspective), temporalDO = factor(temporalDO), pID = factor(pID) ) %>% select(pID, perspective, temporalDO, TIME, DOMAIN, MEAN_SCORE) %>% filter(!is.na(MEAN_SCORE)) desc_stats <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n = n(), mean = round(mean(MEAN_SCORE), 5), variance = round(var(MEAN_SCORE), 5), sd = round(sd(MEAN_SCORE), 5), median = round(median(MEAN_SCORE), 5), q1 = round(quantile(MEAN_SCORE, 0.25), 5), q3 = round(quantile(MEAN_SCORE, 0.75), 5), min = round(min(MEAN_SCORE), 5), max = round(max(MEAN_SCORE), 5), .groups = "drop" ) print(desc_stats, n = Inf) missing_summary <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n_total = n(), n_missing = sum(is.na(MEAN_SCORE)), pct_missing = round(100 * n_missing / n_total, 2), .groups = "drop" ) print(missing_summary, n = Inf) outlier_summary <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n = n(), n_outliers = sum(abs(scale(MEAN_SCORE)) > 3), .groups = "drop" ) print(outlier_summary, n = Inf) homogeneity_between <- long_data %>% group_by(TIME, DOMAIN) %>% rstatix::levene_test(MEAN_SCORE ~ perspective * temporalDO) print(homogeneity_between, n = Inf) # Normality: within-subjects residuals (deviation from each participant's mean) resid_within <- long_data %>% group_by(pID) %>% mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>% ungroup() %>% mutate(resid = MEAN_SCORE - person_mean) %>% pull(resid) resid_within <- resid_within[!is.na(resid_within)] n_resid <- length(resid_within) if (n_resid < 3L) { message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.") } else { resid_for_shapiro <- if (n_resid > 5000L) { set.seed(1L) sample(resid_within, 5000L) } else { resid_within } print(shapiro.test(resid_for_shapiro)) } # qqnorm(resid_within) # qqline(resid_within) aov_afex <- aov_ez( id = "pID", dv = "MEAN_SCORE", data = long_data, between = c("perspective", "temporalDO"), within = c("TIME", "DOMAIN"), type = 3 ) # ANOVA table: uncorrected and Greenhouse–Geisser cat("\n--- ANOVA Table (Type 3, uncorrected) ---\n") print(nice(aov_afex, correction = "none")) cat("\n--- ANOVA Table (Type 3, Greenhouse–Geisser correction) ---\n") print(nice(aov_afex, correction = "GG")) # Mauchly's test of sphericity and epsilon (via car::Anova on wide data) anova_wide <- anova_data %>% select(pID, perspective, temporalDO, all_of(within_vars)) %>% filter(if_all(all_of(within_vars), ~ !is.na(.))) response_matrix <- as.matrix(anova_wide[, within_vars]) rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide) idata <- data.frame( TIME = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")), DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val")) ) rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3) rm_summary <- summary(rm_anova, multivariate = FALSE) if (!is.null(rm_summary$sphericity.tests)) { cat("\nMauchly's Test of Sphericity:\n") print(rm_summary$sphericity.tests) } if (!is.null(rm_summary$epsilon)) { cat("\nEpsilon (GG, HF):\n") print(rm_summary$epsilon) } # Within-subjects residuals: deviation from each participant's mean (one per observation) resid_within <- long_data %>% group_by(pID) %>% mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>% ungroup() %>% mutate(resid = MEAN_SCORE - person_mean) %>% pull(resid) resid_within <- resid_within[!is.na(resid_within)] # R's shapiro.test() allows 3 <= n <= 5000; use a random sample of 5000 if we have more n_resid <- length(resid_within) if (n_resid < 3L) { message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.") } else { resid_for_shapiro <- if (n_resid > 5000L) { set.seed(1L) sample(resid_within, 5000L) } else { resid_within } print(shapiro.test(resid_for_shapiro)) } # qqnorm(resid_within) # qqline(resid_within) # POST-HOC COMPARISONS (significant effects only) # TIME (main effect) emm_TIME <- emmeans(aov_afex, ~ TIME) print(pairs(emm_TIME, adjust = "bonferroni")) # temporalDO:TIME — ~TIME and ~temporalDO emm_temporalDO_TIME <- emmeans(aov_afex, ~ TIME | temporalDO) print(pairs(emm_temporalDO_TIME, adjust = "bonferroni")) emm_temporalDO_temporalDO <- emmeans(aov_afex, ~ temporalDO | TIME) print(pairs(emm_temporalDO_temporalDO, adjust = "bonferroni")) # perspective:temporalDO:TIME — ~TIME, ~perspective, ~temporalDO emm_pt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO) print(pairs(emm_pt_TIME, adjust = "bonferroni")) emm_pt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME) print(pairs(emm_pt_perspective, adjust = "bonferroni")) emm_pt_temporalDO <- emmeans(aov_afex, ~ temporalDO | perspective + TIME) print(pairs(emm_pt_temporalDO, adjust = "bonferroni")) # perspective:DOMAIN — ~perspective and ~DOMAIN emm_perspective_DOMAIN <- emmeans(aov_afex, ~ perspective | DOMAIN) print(pairs(emm_perspective_DOMAIN, adjust = "bonferroni")) emm_perspective_DOMAIN_domain <- emmeans(aov_afex, ~ DOMAIN | perspective) print(pairs(emm_perspective_DOMAIN_domain, adjust = "bonferroni")) # perspective:TIME:DOMAIN — ~TIME, ~perspective, ~DOMAIN emm_pt_TIME_domain <- emmeans(aov_afex, ~ TIME | perspective + DOMAIN) print(pairs(emm_pt_TIME_domain, adjust = "bonferroni")) emm_pt_domain_perspective <- emmeans(aov_afex, ~ perspective | TIME + DOMAIN) print(pairs(emm_pt_domain_perspective, adjust = "bonferroni")) emm_pt_domain_domain <- emmeans(aov_afex, ~ DOMAIN | perspective + TIME) print(pairs(emm_pt_domain_domain, adjust = "bonferroni")) # perspective:temporalDO:TIME:DOMAIN — ~TIME, ~perspective, ~temporalDO emm_ptt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO + DOMAIN) print(pairs(emm_ptt_TIME, adjust = "bonferroni")) emm_ptt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME + DOMAIN) print(pairs(emm_ptt_perspective, adjust = "bonferroni")) emm_ptt_temporalDO <- emmeans(aov_afex, ~ temporalDO | perspective + TIME + DOMAIN) print(pairs(emm_ptt_temporalDO, adjust = "bonferroni"))