--- title: "Mixed ANOVA - Domain General Vars" author: "" date: "`r Sys.Date()`" output: html_document: toc: true toc_float: true code_folding: hide --- ```{r setup, include = FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = TRUE) ``` # Setup ```{r libraries} library(tidyverse) library(rstatix) library(emmeans) library(effectsize) library(afex) library(car) options(scipen = 999) afex::set_sum_contrasts() ``` # Data ```{r read-data} # Data file is in parent of knit/ (eohi3/eohi3.csv) df <- read.csv( "/home/ladmin/Documents/DND/EOHI/eohi3/eohi3.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA" ) between_vars <- c("perspective", "temporalDO") within_vars <- c( "past_pref_DGEN", "past_pers_DGEN", "past_val_DGEN", "fut_pref_DGEN", "fut_pers_DGEN", "fut_val_DGEN" ) missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df)) if (length(missing_vars) > 0) { stop(paste("Missing required variables:", paste(missing_vars, collapse = ", "))) } anova_data <- df %>% select(pID, all_of(between_vars), all_of(within_vars)) %>% filter( !is.na(perspective), perspective != "", !is.na(temporalDO), temporalDO != "" ) ``` # Long format ```{r long-format} long_data <- anova_data %>% pivot_longer( cols = all_of(within_vars), names_to = "variable", values_to = "DGEN_SCORE" ) %>% mutate( time = case_when( grepl("^past_", variable) ~ "past", grepl("^fut_", variable) ~ "fut", TRUE ~ NA_character_ ), domain = case_when( grepl("_pref_DGEN$", variable) ~ "pref", grepl("_pers_DGEN$", variable) ~ "pers", grepl("_val_DGEN$", variable) ~ "val", TRUE ~ NA_character_ ) ) %>% mutate( TIME = factor(time, levels = c("past", "fut")), DOMAIN = factor(domain, levels = c("pref", "pers", "val")), perspective = factor(perspective), temporalDO = factor(temporalDO), pID = factor(pID) ) %>% select(pID, perspective, temporalDO, TIME, DOMAIN, DGEN_SCORE) %>% filter(!is.na(DGEN_SCORE)) ``` # Descriptive statistics ```{r desc-stats} desc_stats <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n = n(), mean = round(mean(DGEN_SCORE), 5), variance = round(var(DGEN_SCORE), 5), sd = round(sd(DGEN_SCORE), 5), median = round(median(DGEN_SCORE), 5), q1 = round(quantile(DGEN_SCORE, 0.25), 5), q3 = round(quantile(DGEN_SCORE, 0.75), 5), min = round(min(DGEN_SCORE), 5), max = round(max(DGEN_SCORE), 5), .groups = "drop" ) # Show all rows and columns (no truncation) options(tibble.width = Inf) print(desc_stats, n = Inf) ``` Interpretation: 1. Mean and median values are similar w/ slightly more variation than in the domain specific anova. 2. Highest to lowest group n size ratio is 1.14 (139/122). Acceptable ratio for ANOVA (under 1.5). 3. Highest to lowest overall group variance ratio is 1.40 (9.32/6.65). Acceptable ratio for ANOVA (under 4). For the sake of consistency w/ the other EHI studies, I also calculated Hartley's F-max ratio. The conservative F-max critical value is 1.60 (same as DS anova since number of groups and n sizes doesn't change), which is still higher than the highest observed F-max ratio of 1.28. # Assumption checks ## Missing values ```{r missing} missing_summary <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n_total = n(), n_missing = sum(is.na(DGEN_SCORE)), pct_missing = round(100 * n_missing / n_total, 2), .groups = "drop" ) print(missing_summary, n = Inf) ``` No missing values. As expected. ## Outliers ```{r outliers} outlier_summary <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n = n(), n_outliers = sum(abs(scale(DGEN_SCORE)) > 3), .groups = "drop" ) print(outlier_summary, n = Inf) ``` No outliers present. Good. Same as DS anova. ## Homogeneity of variance ```{r homogeneity} homogeneity_between <- long_data %>% group_by(TIME, DOMAIN) %>% rstatix::levene_test(DGEN_SCORE ~ perspective * temporalDO) print(homogeneity_between, n = Inf) ``` Levene's test is signiicant for 1 cell only: past-val. However, variance ratios and F-max tests show that any heteroscedasticity is mild. ## Normality (within-subjects residuals) ```{r normality} resid_within <- long_data %>% group_by(pID) %>% mutate(person_mean = mean(DGEN_SCORE, na.rm = TRUE)) %>% ungroup() %>% mutate(resid = DGEN_SCORE - person_mean) %>% pull(resid) resid_within <- resid_within[!is.na(resid_within)] n_resid <- length(resid_within) if (n_resid < 3L) { message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.") } else { resid_for_shapiro <- if (n_resid > 5000L) { set.seed(1L) sample(resid_within, 5000L) } else { resid_within } print(shapiro.test(resid_for_shapiro)) } ``` ### Q-Q plot ```{r qqplot, fig.height = 4} qqnorm(resid_within) qqline(resid_within) ``` Shapiro-Wilk test is significant but is sensitive to large sample size. QQ plot shows that strict centre residuals are normally distributed, but there is some deviation from normality. ANOVA is robust to violations of normality w/ large sample size. Overall, ANOVA can proceed. # Mixed ANOVA ```{r anova} aov_afex <- aov_ez( id = "pID", dv = "DGEN_SCORE", data = long_data, between = c("perspective", "temporalDO"), within = c("TIME", "DOMAIN"), type = 3, anova_table = list(correction = "none") ) print(aov_afex) ``` Mauchly's test of sphericity is not significant. Using uncorrected values for interpretation and analysis. Significant main effects and interactions: Effect df MSE F ges p 4 TIME 1, 518 3.11 8.39 ** .001 .004 8 DOMAIN 2, 1036 2.13 7.85 *** .001 <.001 10 temporalDO:DOMAIN 2, 1036 2.13 5.00 ** <.001 .007 15 perspective:temporalDO:TIME:DOMAIN 2, 1036 1.52 3.12 * <.001 .045 # Mauchly and epsilon ```{r mauchly} anova_wide <- anova_data %>% select(pID, perspective, temporalDO, all_of(within_vars)) %>% filter(if_all(all_of(within_vars), ~ !is.na(.))) response_matrix <- as.matrix(anova_wide[, within_vars]) rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide) idata <- data.frame( TIME = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")), DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val")) ) rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3) rm_summary <- summary(rm_anova, multivariate = FALSE) if (!is.null(rm_summary$sphericity.tests)) { print(rm_summary$sphericity.tests) } if (!is.null(rm_summary$epsilon)) { print(rm_summary$epsilon) } ``` # Post hoc comparisons ## TIME (main effect) ```{r posthoc-TIME} emm_TIME <- emmeans(aov_afex, ~ TIME) print(pairs(emm_TIME, adjust = "none")) ``` Supports presence of EHI effect. ## DOMAIN (main effect) ```{r posthoc-domain} emm_DOMAIN <- emmeans(aov_afex, ~ DOMAIN) print(pairs(emm_DOMAIN, adjust = "tukey")) ``` Only preference to values contrast is significant. ## temporalDO:DOMAIN ```{r posthoc-temporaldo-domain} emmeans(aov_afex, pairwise ~ temporalDO | DOMAIN, adjust = "none")$contrasts emmeans(aov_afex, pairwise ~ DOMAIN | temporalDO, adjust = "tukey")$contrasts ``` When grouped by domain, no contrasts are significant. When grouped by temporalDO, some contrasts are significant: Future-first temporal display order: contrast estimate SE df t.ratio p.value pref - pers 0.25065 0.0892 518 2.810 0.0142 Past-first temporal display order: contrast estimate SE df t.ratio p.value pref - val 0.33129 0.0895 518 3.702 0.0007 pers - val 0.32478 0.0921 518 3.527 0.0013 ## perspective:temporalDO:TIME:DOMAIN ### contrasts for TIME grouped by perspective, temporalDO, and DOMAIN ```{r posthoc-fourway} emm_fourway <- emmeans(aov_afex, pairwise ~ TIME | perspective * temporalDO * DOMAIN, adjust = "tukey") print(emm_fourway$contrasts) ``` Significant contrasts: contrast estimate SE df t.ratio p.value past - fut 0.5285 0.179 518 2.957 0.0032 (self-perspective, personality domain, past-first temporal display order) past - fut 0.5366 0.187 518 2.863 0.0044 (self-perspective, values domain, past-first temporal display order) ### contrasts for DOMAIN grouped by perspective, TIME, and temporalDO ```{r posthoc-fourway2} emm_fourway2 <- emmeans(aov_afex, pairwise ~ DOMAIN | perspective * TIME * temporalDO, adjust = "tukey") print(emm_fourway2$contrasts) ``` Significant contrasts: contrast estimate SE df t.ratio p.value pref - val 0.6259 0.166 518 3.778 0.0005 (other-perspective, past-directed questions, past-first temporal display order) pers - val 0.4892 0.160 518 3.056 0.0066 (other-perspective, past-directed questions, past-first temporal display order) pref - val 0.4309 0.168 518 2.559 0.0290 (self-perspective, future-directed questions, past-first temporal display order) ## Cohen's d (significant contrasts only) ```{r cohens-d-significant} d_data <- anova_data %>% mutate( past_mean = (past_pref_DGEN + past_pers_DGEN + past_val_DGEN) / 3, fut_mean = (fut_pref_DGEN + fut_pers_DGEN + fut_val_DGEN) / 3, pref_mean = (past_pref_DGEN + fut_pref_DGEN) / 2, pers_mean = (past_pers_DGEN + fut_pers_DGEN) / 2, val_mean = (past_val_DGEN + fut_val_DGEN) / 2 ) cohens_d_results <- tibble( contrast = character(), condition = character(), d = double() ) # TIME main: past vs fut cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "overall", d = suppressMessages(effectsize::cohens_d(d_data$past_mean, d_data$fut_mean, paired = TRUE)$Cohens_d) ) # DOMAIN main: pref vs val cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "overall", d = suppressMessages(effectsize::cohens_d(d_data$pref_mean, d_data$val_mean, paired = TRUE)$Cohens_d) ) # temporalDO:DOMAIN - future: pref vs pers d_fut <- d_data %>% filter(temporalDO == "future") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - pers)", condition = "temporalDO = future", d = suppressMessages(effectsize::cohens_d(d_fut$pref_mean, d_fut$pers_mean, paired = TRUE)$Cohens_d) ) # temporalDO:DOMAIN - past: pref vs val, pers vs val d_past <- d_data %>% filter(temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "temporalDO = past", d = suppressMessages(effectsize::cohens_d(d_past$pref_mean, d_past$val_mean, paired = TRUE)$Cohens_d) ) %>% add_row( contrast = "DOMAIN (pers - val)", condition = "temporalDO = past", d = suppressMessages(effectsize::cohens_d(d_past$pers_mean, d_past$val_mean, paired = TRUE)$Cohens_d) ) # 4-way TIME: self, past temporalDO, pers d_self_past <- d_data %>% filter(perspective == "self", temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "self, past temporalDO, pers domain", d = suppressMessages(effectsize::cohens_d(d_self_past$past_pers_DGEN, d_self_past$fut_pers_DGEN, paired = TRUE)$Cohens_d) ) # 4-way TIME: self, past temporalDO, val cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "self, past temporalDO, val domain", d = suppressMessages(effectsize::cohens_d(d_self_past$past_val_DGEN, d_self_past$fut_val_DGEN, paired = TRUE)$Cohens_d) ) # 4-way DOMAIN: other, past TIME, past temporalDO - pref vs val d_other_past_tpast <- d_data %>% filter(perspective == "other", temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "other, past TIME, past temporalDO", d = suppressMessages(effectsize::cohens_d(d_other_past_tpast$past_pref_DGEN, d_other_past_tpast$past_val_DGEN, paired = TRUE)$Cohens_d) ) # 4-way DOMAIN: other, past TIME, past temporalDO - pers vs val cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pers - val)", condition = "other, past TIME, past temporalDO", d = suppressMessages(effectsize::cohens_d(d_other_past_tpast$past_pers_DGEN, d_other_past_tpast$past_val_DGEN, paired = TRUE)$Cohens_d) ) # 4-way DOMAIN: self, fut TIME, past temporalDO - pref vs val d_self_fut_tpast <- d_data %>% filter(perspective == "self", temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "self, fut TIME, past temporalDO", d = suppressMessages(effectsize::cohens_d(d_self_fut_tpast$fut_pref_DGEN, d_self_fut_tpast$fut_val_DGEN, paired = TRUE)$Cohens_d) ) cohens_d_results %>% mutate(d = round(d, 3)) %>% print(n = Inf) ``` Size d Interpretation Small 0.2 Weak effect Medium 0.5 Moderate effect Large 0.8 Strong effect