--- title: "Mixed ANOVA - Domain Specific Means (DA01)" author: "" date: "`r Sys.Date()`" output: html_document: toc: true toc_float: true code_folding: hide --- ```{r setup, include = FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = TRUE) ``` # Setup ```{r libraries} library(tidyverse) library(rstatix) library(emmeans) library(effectsize) library(afex) library(car) options(scipen = 999) afex::set_sum_contrasts() ``` # Data ```{r read-data} # Data file is in parent of knit/ (eohi3/eohi3.csv) df <- read.csv( "/home/ladmin/Documents/DND/EOHI/eohi3/eohi3.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA" ) between_vars <- c("perspective", "temporalDO") within_vars <- c( "past_pref_MEAN", "past_pers_MEAN", "past_val_MEAN", "fut_pref_MEAN", "fut_pers_MEAN", "fut_val_MEAN" ) missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df)) if (length(missing_vars) > 0) { stop(paste("Missing required variables:", paste(missing_vars, collapse = ", "))) } anova_data <- df %>% select(pID, all_of(between_vars), all_of(within_vars)) %>% filter( !is.na(perspective), perspective != "", !is.na(temporalDO), temporalDO != "" ) ``` # Long format ```{r long-format} long_data <- anova_data %>% pivot_longer( cols = all_of(within_vars), names_to = "variable", values_to = "MEAN_SCORE" ) %>% mutate( time = case_when( grepl("^past_", variable) ~ "past", grepl("^fut_", variable) ~ "fut", TRUE ~ NA_character_ ), domain = case_when( grepl("_pref_MEAN$", variable) ~ "pref", grepl("_pers_MEAN$", variable) ~ "pers", grepl("_val_MEAN$", variable) ~ "val", TRUE ~ NA_character_ ) ) %>% mutate( TIME = factor(time, levels = c("past", "fut")), DOMAIN = factor(domain, levels = c("pref", "pers", "val")), perspective = factor(perspective), temporalDO = factor(temporalDO), pID = factor(pID) ) %>% select(pID, perspective, temporalDO, TIME, DOMAIN, MEAN_SCORE) %>% filter(!is.na(MEAN_SCORE)) ``` # Descriptive statistics ```{r desc-stats} desc_stats <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n = n(), mean = round(mean(MEAN_SCORE), 5), variance = round(var(MEAN_SCORE), 5), sd = round(sd(MEAN_SCORE), 5), median = round(median(MEAN_SCORE), 5), q1 = round(quantile(MEAN_SCORE, 0.25), 5), q3 = round(quantile(MEAN_SCORE, 0.75), 5), min = round(min(MEAN_SCORE), 5), max = round(max(MEAN_SCORE), 5), .groups = "drop" ) # Show all rows and columns (no truncation) options(tibble.width = Inf) print(desc_stats, n = Inf) ``` Interpretations: 1. Mean and median values are similar, indicating distribution is relatively symmetric and any skew is minimal. Any outliers are not extreme. 2. Highest to lowest group n size ratio is 1.14 (139/122). Acceptable ratio for ANOVA (under 1.5). 3. Highest to lowest overall group variance ratio is 1.67 (7.93/4.74). Acceptable ratio for ANOVA (under 4). For the sake of consistency w/ the other EHI studies, I also calculated Hartley's F-max ratio. The conservative F-max critical value is 1.60, which is still higher than the highest observed F-max ratio of 1.53. # Assumption checks ## Missing values ```{r missing} missing_summary <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n_total = n(), n_missing = sum(is.na(MEAN_SCORE)), pct_missing = round(100 * n_missing / n_total, 2), .groups = "drop" ) print(missing_summary, n = Inf) ``` No missing values. As expected. ## Outliers ```{r outliers} outlier_summary <- long_data %>% group_by(perspective, temporalDO, TIME, DOMAIN) %>% summarise( n = n(), n_outliers = sum(abs(scale(MEAN_SCORE)) > 3), .groups = "drop" ) print(outlier_summary, n = Inf) ``` No outliers present. Good. ## Homogeneity of variance ```{r homogeneity} homogeneity_between <- long_data %>% group_by(TIME, DOMAIN) %>% rstatix::levene_test(MEAN_SCORE ~ perspective * temporalDO) print(homogeneity_between, n = Inf) ``` Levene's test is sigifnicant for two cells: fut-pers and fut-val. However, variance ratios and F-max tests show that any heteroscedasticity is mild. ## Normality (within-subjects residuals) ```{r normality} resid_within <- long_data %>% group_by(pID) %>% mutate(person_mean = mean(MEAN_SCORE, na.rm = TRUE)) %>% ungroup() %>% mutate(resid = MEAN_SCORE - person_mean) %>% pull(resid) resid_within <- resid_within[!is.na(resid_within)] n_resid <- length(resid_within) if (n_resid < 3L) { message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.") } else { resid_for_shapiro <- if (n_resid > 5000L) { set.seed(1L) sample(resid_within, 5000L) } else { resid_within } print(shapiro.test(resid_for_shapiro)) } ``` ### Q-Q plot ```{r qqplot, fig.height = 4} qqnorm(resid_within) qqline(resid_within) ``` Shapiro-Wilk is significant but is sensitive to large sample size. QQ plot shows that centre residuals are normally distributed, with some tail heaviness. ANOVA is robust to violations of normality w/ large sample size. Overall, ANOVA can proceed. # Mixed ANOVA ```{r anova} aov_afex <- aov_ez( id = "pID", dv = "MEAN_SCORE", data = long_data, between = c("perspective", "temporalDO"), within = c("TIME", "DOMAIN"), type = 3 ) cat("\n--- ANOVA Table (Type 3, uncorrected) ---\n") print(nice(aov_afex, correction = "none")) cat("\n--- ANOVA Table (Type 3, Greenhouse–Geisser correction) ---\n") print(nice(aov_afex, correction = "GG")) ``` Mauchly's test of sphericity is sig for DOMAIN main effect and interaction (except w/ TIME). Use GG correction for these: - 8 DOMAIN 1.94, 1004.66 1.21 0.63 <.001, p = .529 ## 9 perspective:DOMAIN 1.94, 1004.66 1.21 7.79 *** <.001, p <.001 ## 10 temporalDO:DOMAIN 1.94, 1004.66 1.21 0.76 <.001, p = .466 ## 11 perspective:temporalDO:DOMAIN 1.94, 1004.66 1.21 0.17 <.001, p = .837 The following are significant main effects and interactions: ## 15 perspective:temporalDO:TIME:DOMAIN 2, 1036 0.75 3.11 * <.001 .045 ## 13 perspective:TIME:DOMAIN 2, 1036 0.75 3.58 * <.001 .028 ## 9 perspective:DOMAIN 1.94, 1004.66 1.21 7.79 *** <.001, p <.001 (GG corrected) ## 6 temporalDO:TIME 1, 518 1.86 9.81 ** <.001 .002 ## 7 perspective:temporalDO:TIME 1, 518 1.86 7.91 ** <.001 .005 ## 4 TIME 1, 518 1.86 10.05 ** <.001 .002 # Mauchly and epsilon ```{r mauchly} anova_wide <- anova_data %>% select(pID, perspective, temporalDO, all_of(within_vars)) %>% filter(if_all(all_of(within_vars), ~ !is.na(.))) response_matrix <- as.matrix(anova_wide[, within_vars]) rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide) idata <- data.frame( TIME = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")), DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val")) ) rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3) rm_summary <- summary(rm_anova, multivariate = FALSE) if (!is.null(rm_summary$sphericity.tests)) { cat("\nMauchly's Test of Sphericity:\n") print(rm_summary$sphericity.tests) } if (!is.null(rm_summary$epsilon)) { cat("\nEpsilon (GG, HF):\n") print(rm_summary$epsilon) } ``` # Post-hoc comparisons ## TIME (main effect) ```{r posthoc-TIME} emm_TIME <- emmeans(aov_afex, ~ TIME) print(pairs(emm_TIME, adjust = "bonferroni")) ``` Pairwise comparison provide supprot for EHI effect. ## temporalDO:TIME ```{r posthoc-temporalDO-TIME} emm_temporalDO_TIME <- emmeans(aov_afex, ~ TIME | temporalDO) print(pairs(emm_temporalDO_TIME, adjust = "bonferroni")) ``` Contrast significant only for temporal display order of past first, then future. When grouped by time instead of temporalDO, no contrasts are significant. ## perspective:temporalDO:TIME ```{r posthoc-pt-TIME} emm_pt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO) print(pairs(emm_pt_TIME, adjust = "bonferroni")) ``` EHI is significant only for self perspective and past first temporal display order. When grouped by perspective or temporalDO instead of TIME, no contrasts are significant. ## perspective:DOMAIN ```{r posthoc-perspective-DOMAIN} emm_perspective_DOMAIN <- emmeans(aov_afex, ~ perspective | DOMAIN) print(pairs(emm_perspective_DOMAIN, adjust = "bonferroni")) emm_perspective_DOMAIN_domain <- emmeans(aov_afex, ~ DOMAIN | perspective) print(pairs(emm_perspective_DOMAIN_domain, adjust = "bonferroni")) ``` significance is driven by the change from preferences to values in the "other" perspective. ## perspective:TIME:DOMAIN ```{r posthoc-pt-DOMAIN} emm_pt_TIME_domain <- emmeans(aov_afex, ~ TIME | perspective + DOMAIN) print(pairs(emm_pt_TIME_domain, adjust = "bonferroni")) ``` EHI effects present for other-perspective in the preferences domain and for self-perspective in the values domain. Estimate is higher in the self-perspective than in the other-perspective. ```{r posthoc-pt-DOMAIN-domain} emm_pt_domain_domain <- emmeans(aov_afex, ~ DOMAIN | perspective + TIME) print(pairs(emm_pt_domain_domain, adjust = "bonferroni")) ``` Significant contrasts are driven by domain changes from preferences to values in the self vs other perspectives, in the past-oriented questions. Trends reverse depending on perspective, where values have higher estimates than preferences in the self-perspective, but lower estimates than preferences in the other-perspective. ## perspective:temporalDO:TIME:DOMAIN ```{r posthoc-ptt-TIME} emm_ptt_TIME <- emmeans(aov_afex, ~ TIME | perspective + temporalDO + DOMAIN) print(pairs(emm_ptt_TIME, adjust = "bonferroni")) ``` EHI effects are present for three contrasts: - past - fut 0.2806 0.118 518 2.380 0.0177 (other-perspective, preferences domain, past-first temporal display order) - past - fut 0.4358 0.138 518 3.156 0.0017 (self-perspective, personality domain, past-first temporal display order) - past - fut 0.7276 0.141 518 5.169 <0.0001 (self-perspective, values domain, past-first temporal display order) A reverse EHI effect is present for 1 contrast: - past - fut -0.2367 0.118 518 -2.001 0.0459 (self-personality, preferences domain, future-first temporal display order) ```{r posthoc-ptt-perspective} emm_ptt_perspective <- emmeans(aov_afex, ~ perspective | temporalDO + TIME + DOMAIN) print(pairs(emm_ptt_perspective, adjust = "bonferroni")) ``` 1 significant contrast: - other - self -0.6972 0.314 518 -2.220 0.0268 (values domain, past-oriented questions, past-first temporal display order) not really of theoretical interest but speaks to the perspective:TIME:DOMAIN interaction. no significant contrasts when grouped by temporalDO instead of TIME or perspective. ## Cohen's d (significant contrasts only) ```{r cohens-d-significant} d_data <- anova_data %>% mutate( past_mean = (past_pref_MEAN + past_pers_MEAN + past_val_MEAN) / 3, fut_mean = (fut_pref_MEAN + fut_pers_MEAN + fut_val_MEAN) / 3, pref_mean = (past_pref_MEAN + fut_pref_MEAN) / 2, pers_mean = (past_pers_MEAN + fut_pers_MEAN) / 2, val_mean = (past_val_MEAN + fut_val_MEAN) / 2 ) cohens_d_results <- tibble( contrast = character(), condition = character(), d = double() ) # TIME main: past vs fut cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "overall", d = suppressMessages(effectsize::cohens_d(d_data$past_mean, d_data$fut_mean, paired = TRUE)$Cohens_d) ) # temporalDO:TIME: past vs fut for temporalDO = past d_past_tdo <- d_data %>% filter(temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "temporalDO = past", d = suppressMessages(effectsize::cohens_d(d_past_tdo$past_mean, d_past_tdo$fut_mean, paired = TRUE)$Cohens_d) ) # perspective:temporalDO:TIME: past vs fut for self, past temporalDO d_self_past <- d_data %>% filter(perspective == "self", temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "self, temporalDO = past", d = suppressMessages(effectsize::cohens_d(d_self_past$past_mean, d_self_past$fut_mean, paired = TRUE)$Cohens_d) ) # perspective:DOMAIN: pref vs val for perspective = other d_other <- d_data %>% filter(perspective == "other") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "perspective = other", d = suppressMessages(effectsize::cohens_d(d_other$pref_mean, d_other$val_mean, paired = TRUE)$Cohens_d) ) # perspective:TIME:DOMAIN - TIME: other, pref d_other_pref <- d_data %>% filter(perspective == "other") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "other, pref domain", d = suppressMessages(effectsize::cohens_d(d_other_pref$past_pref_MEAN, d_other_pref$fut_pref_MEAN, paired = TRUE)$Cohens_d) ) # perspective:TIME:DOMAIN - TIME: self, val d_self_val <- d_data %>% filter(perspective == "self") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "self, val domain", d = suppressMessages(effectsize::cohens_d(d_self_val$past_val_MEAN, d_self_val$fut_val_MEAN, paired = TRUE)$Cohens_d) ) # perspective:TIME:DOMAIN - DOMAIN: other, past TIME d_other_past <- d_data %>% filter(perspective == "other") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "other, past TIME", d = suppressMessages(effectsize::cohens_d(d_other_past$past_pref_MEAN, d_other_past$past_val_MEAN, paired = TRUE)$Cohens_d) ) # perspective:TIME:DOMAIN - DOMAIN: self, past TIME: pref - pers d_self_past_t <- d_data %>% filter(perspective == "self") cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - pers)", condition = "self, past TIME", d = suppressMessages(effectsize::cohens_d(d_self_past_t$past_pref_MEAN, d_self_past_t$past_pers_MEAN, paired = TRUE)$Cohens_d) ) # perspective:TIME:DOMAIN - DOMAIN: self, past TIME: pref - val cohens_d_results <- cohens_d_results %>% add_row( contrast = "DOMAIN (pref - val)", condition = "self, past TIME", d = suppressMessages(effectsize::cohens_d(d_self_past_t$past_pref_MEAN, d_self_past_t$past_val_MEAN, paired = TRUE)$Cohens_d) ) # 4-way TIME: self, future temporalDO, pref (reverse EHI) d_self_fut_pref <- d_data %>% filter(perspective == "self", temporalDO == "future") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut) [reverse]", condition = "self, future temporalDO, pref domain", d = suppressMessages(effectsize::cohens_d(d_self_fut_pref$past_pref_MEAN, d_self_fut_pref$fut_pref_MEAN, paired = TRUE)$Cohens_d) ) # 4-way TIME: other, past temporalDO, pref d_other_past_pref <- d_data %>% filter(perspective == "other", temporalDO == "past") cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "other, past temporalDO, pref domain", d = suppressMessages(effectsize::cohens_d(d_other_past_pref$past_pref_MEAN, d_other_past_pref$fut_pref_MEAN, paired = TRUE)$Cohens_d) ) # 4-way TIME: self, past temporalDO, pers cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "self, past temporalDO, pers domain", d = suppressMessages(effectsize::cohens_d(d_self_past$past_pers_MEAN, d_self_past$fut_pers_MEAN, paired = TRUE)$Cohens_d) ) # 4-way TIME: self, past temporalDO, val cohens_d_results <- cohens_d_results %>% add_row( contrast = "TIME (past - fut)", condition = "self, past temporalDO, val domain", d = suppressMessages(effectsize::cohens_d(d_self_past$past_val_MEAN, d_self_past$fut_val_MEAN, paired = TRUE)$Cohens_d) ) # 4-way perspective: past temporalDO, past TIME, val domain (other - self, between-subjects) d_ptt_val <- d_data %>% filter(temporalDO == "past") %>% select(perspective, past_val_MEAN) d_other_ptt <- d_ptt_val %>% filter(perspective == "other") %>% pull(past_val_MEAN) d_self_ptt <- d_ptt_val %>% filter(perspective == "self") %>% pull(past_val_MEAN) cohens_d_results <- cohens_d_results %>% add_row( contrast = "perspective (other - self)", condition = "past temporalDO, past TIME, val domain", d = suppressMessages(effectsize::cohens_d(d_other_ptt, d_self_ptt, paired = FALSE)$Cohens_d) ) cohens_d_results %>% mutate(d = round(d, 3)) %>% print(n = Inf) ```