eohi/eohi3/DA01_anova_DS.r
Irina Levit d0d0e0f8d4 Add DA00 F-max, DA01 ANOVA script, knit Rmds; ignore lock files in .gitignore (#6)
Reviewed-on: #6
Co-authored-by: Irina Levit <irina.levit.rn@gmail.com>
Co-committed-by: Irina Levit <irina.levit.rn@gmail.com>
2026-02-03 15:19:27 -05:00

236 lines
7.7 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

library(tidyverse)
library(rstatix)
library(emmeans)
library(effectsize)
library(afex)
library(car)
options(scipen = 999)
afex::set_sum_contrasts()
setwd("/home/ladmin/Documents/DND/EOHI/eohi3")
df <- read.csv(
"eohi3.csv",
stringsAsFactors = FALSE,
check.names = FALSE,
na.strings = "NA"
)
between_vars <- c("perspective", "temporalDO")
within_vars <- c(
"past_pref_MEAN", "past_pers_MEAN", "past_val_MEAN",
"fut_pref_MEAN", "fut_pers_MEAN", "fut_val_MEAN"
)
missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df))
if (length(missing_vars) > 0) {
stop(paste("Missing required variables:", paste(missing_vars, collapse = ", ")))
}
anova_data <- df %>%
select(pID, all_of(between_vars), all_of(within_vars)) %>%
filter(
!is.na(perspective), perspective != "",
!is.na(temporalDO), temporalDO != ""
)
long_data <- anova_data %>%
pivot_longer(
cols = all_of(within_vars),
names_to = "variable",
values_to = "MEAN_SCORE"
) %>%
mutate(
time = case_when(
grepl("^past_", variable) ~ "past",
grepl("^fut_", variable) ~ "fut",
TRUE ~ NA_character_
),
domain = case_when(
grepl("_pref_MEAN$", variable) ~ "pref",
grepl("_pers_MEAN$", variable) ~ "pers",
grepl("_val_MEAN$", variable) ~ "val",
TRUE ~ NA_character_
)
) %>%
mutate(
TIME = factor(time, levels = c("past", "fut")),
DOMAIN = factor(domain, levels = c("pref", "pers", "val")),
perspective = factor(perspective),
temporalDO = factor(temporalDO),
pID = factor(pID)
) %>%
select(pID, perspective, temporalDO, TIME, DOMAIN, MEAN_SCORE) %>%
filter(!is.na(MEAN_SCORE))
desc_stats <- long_data %>%
group_by(perspective, temporalDO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_SCORE), 5),
variance = round(var(MEAN_SCORE), 5),
sd = round(sd(MEAN_SCORE), 5),
median = round(median(MEAN_SCORE), 5),
q1 = round(quantile(MEAN_SCORE, 0.25), 5),
q3 = round(quantile(MEAN_SCORE, 0.75), 5),
min = round(min(MEAN_SCORE), 5),
max = round(max(MEAN_SCORE), 5),
.groups = "drop"
)
print(desc_stats, n = Inf)
missing_summary <- long_data %>%
group_by(perspective, temporalDO, TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_SCORE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = "drop"
)
print(missing_summary, n = Inf)
outlier_summary <- long_data %>%
group_by(perspective, temporalDO, TIME, DOMAIN) %>%
summarise(
n = n(),
n_outliers = sum(abs(scale(MEAN_SCORE)) > 3),
.groups = "drop"
)
print(outlier_summary, n = Inf)
homogeneity_between <- long_data %>%
group_by(TIME, DOMAIN) %>%
rstatix::levene_test(MEAN_SCORE ~ perspective * temporalDO)
print(homogeneity_between, n = Inf)
# Normality: within-subjects residuals (deviation from each participant's mean)
resid_within <- long_data %>%
group_by(pID) %>%
mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>%
ungroup() %>%
mutate(resid = MEAN_SCORE - person_mean) %>%
pull(resid)
resid_within <- resid_within[!is.na(resid_within)]
n_resid <- length(resid_within)
if (n_resid < 3L) {
message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
} else {
resid_for_shapiro <- if (n_resid > 5000L) {
set.seed(1L)
sample(resid_within, 5000L)
} else {
resid_within
}
print(shapiro.test(resid_for_shapiro))
}
# qqnorm(resid_within)
# qqline(resid_within)
aov_afex <- aov_ez(
id = "pID",
dv = "MEAN_SCORE",
data = long_data,
between = c("perspective", "temporalDO"),
within = c("TIME", "DOMAIN"),
type = 3
)
# ANOVA table: uncorrected and GreenhouseGeisser
cat("\n--- ANOVA Table (Type 3, uncorrected) ---\n")
print(nice(aov_afex, correction = "none"))
cat("\n--- ANOVA Table (Type 3, GreenhouseGeisser correction) ---\n")
print(nice(aov_afex, correction = "GG"))
# Mauchly's test of sphericity and epsilon (via car::Anova on wide data)
anova_wide <- anova_data %>%
select(pID, perspective, temporalDO, all_of(within_vars)) %>%
filter(if_all(all_of(within_vars), ~ !is.na(.)))
response_matrix <- as.matrix(anova_wide[, within_vars])
rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide)
idata <- data.frame(
TIME = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")),
DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val"))
)
rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3)
rm_summary <- summary(rm_anova, multivariate = FALSE)
if (!is.null(rm_summary$sphericity.tests)) {
cat("\nMauchly's Test of Sphericity:\n")
print(rm_summary$sphericity.tests)
}
if (!is.null(rm_summary$epsilon)) {
cat("\nEpsilon (GG, HF):\n")
print(rm_summary$epsilon)
}
# Within-subjects residuals: deviation from each participant's mean (one per observation)
resid_within <- long_data %>%
group_by(pID) %>%
mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>%
ungroup() %>%
mutate(resid = MEAN_SCORE - person_mean) %>%
pull(resid)
resid_within <- resid_within[!is.na(resid_within)]
# R's shapiro.test() allows 3 <= n <= 5000; use a random sample of 5000 if we have more
n_resid <- length(resid_within)
if (n_resid < 3L) {
message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
} else {
resid_for_shapiro <- if (n_resid > 5000L) {
set.seed(1L)
sample(resid_within, 5000L)
} else {
resid_within
}
print(shapiro.test(resid_for_shapiro))
}
# qqnorm(resid_within)
# qqline(resid_within)
# POST-HOC COMPARISONS (significant effects only)
# TIME (main effect)
emm_TIME <- emmeans(aov_afex, ~ TIME)
print(pairs(emm_TIME, adjust = "bonferroni"))
# temporalDO:TIME — ~TIME and ~temporalDO
emm_temporalDO_TIME <- emmeans(aov_afex, ~ TIME | temporalDO)
print(pairs(emm_temporalDO_TIME, adjust = "bonferroni"))
emm_temporalDO_temporalDO <- emmeans(aov_afex, ~ temporalDO | TIME)
print(pairs(emm_temporalDO_temporalDO, adjust = "bonferroni"))
# perspective:temporalDO:TIME — ~TIME, ~perspective, ~temporalDO
emm_pt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO)
print(pairs(emm_pt_TIME, adjust = "bonferroni"))
emm_pt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME)
print(pairs(emm_pt_perspective, adjust = "bonferroni"))
emm_pt_temporalDO <- emmeans(aov_afex, ~ temporalDO | perspective + TIME)
print(pairs(emm_pt_temporalDO, adjust = "bonferroni"))
# perspective:DOMAIN — ~perspective and ~DOMAIN
emm_perspective_DOMAIN <- emmeans(aov_afex, ~ perspective | DOMAIN)
print(pairs(emm_perspective_DOMAIN, adjust = "bonferroni"))
emm_perspective_DOMAIN_domain <- emmeans(aov_afex, ~ DOMAIN | perspective)
print(pairs(emm_perspective_DOMAIN_domain, adjust = "bonferroni"))
# perspective:TIME:DOMAIN — ~TIME, ~perspective, ~DOMAIN
emm_pt_TIME_domain <- emmeans(aov_afex, ~ TIME | perspective + DOMAIN)
print(pairs(emm_pt_TIME_domain, adjust = "bonferroni"))
emm_pt_domain_perspective <- emmeans(aov_afex, ~ perspective | TIME + DOMAIN)
print(pairs(emm_pt_domain_perspective, adjust = "bonferroni"))
emm_pt_domain_domain <- emmeans(aov_afex, ~ DOMAIN | perspective + TIME)
print(pairs(emm_pt_domain_domain, adjust = "bonferroni"))
# perspective:temporalDO:TIME:DOMAIN — ~TIME, ~perspective, ~temporalDO
emm_ptt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO + DOMAIN)
print(pairs(emm_ptt_TIME, adjust = "bonferroni"))
emm_ptt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME + DOMAIN)
print(pairs(emm_ptt_perspective, adjust = "bonferroni"))
emm_ptt_temporalDO <- emmeans(aov_afex, ~ temporalDO | perspective + TIME + DOMAIN)
print(pairs(emm_ptt_temporalDO, adjust = "bonferroni"))