eohi/eohi2/mixed anova - domain means.r
2025-12-23 15:47:09 -05:00

927 lines
37 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for Domain Means - EOHI2
# EOHI Experiment Data Analysis - Domain Level Analysis with INTERVAL factor
# Variables: NPast_5_pref_MEAN, NPast_5_pers_MEAN, NPast_5_val_MEAN, etc.
# NFut_5_pref_MEAN, NFut_5_pers_MEAN, NFut_5_val_MEAN, etc.
# NPast_10_pref_MEAN, NPast_10_pers_MEAN, NPast_10_val_MEAN, etc.
# NFut_10_pref_MEAN, NFut_10_pers_MEAN, NFut_10_val_MEAN, etc.
# 5.10past_pref_MEAN, 5.10past_pers_MEAN, 5.10past_val_MEAN
# 5.10fut_pref_MEAN, 5.10fut_pers_MEAN, 5.10fut_val_MEAN
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data
data <- read.csv("eohi2.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$ResponseId))))
# Verify the specific variables we need
required_vars <- c("NPast_5_pref_MEAN", "NPast_5_pers_MEAN", "NPast_5_val_MEAN",
"NPast_10_pref_MEAN", "NPast_10_pers_MEAN", "NPast_10_val_MEAN",
"NFut_5_pref_MEAN", "NFut_5_pers_MEAN", "NFut_5_val_MEAN",
"NFut_10_pref_MEAN", "NFut_10_pers_MEAN", "NFut_10_val_MEAN",
"X5.10past_pref_MEAN", "X5.10past_pers_MEAN", "X5.10past_val_MEAN",
"X5.10fut_pref_MEAN", "X5.10fut_pers_MEAN", "X5.10fut_val_MEAN")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required domain mean variables found!")
}
# Define domain mapping with TIME, DOMAIN, and INTERVAL factors
domain_mapping <- data.frame(
variable = required_vars,
time = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
rep("Past", 3), rep("Future", 3)),
domain = rep(c("Preferences", "Personality", "Values"), 6),
interval = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
rep("5_10", 3), rep("5_10", 3)),
stringsAsFactors = FALSE
)
print(domain_mapping)
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values")),
INTERVAL = factor(interval, levels = c("5", "10", "5_10")),
ResponseId = as.factor(ResponseId),
TEMPORAL_DO = as.factor(TEMPORAL_DO),
INTERVAL_DO = as.factor(INTERVAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(ResponseId, TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$ResponseId))))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
desc_stats <- long_data %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_between <- long_data %>%
group_by(TEMPORAL_DO, INTERVAL_DO, TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_between)
# Summary by between-subjects factors only
desc_stats_between_only <- long_data %>%
group_by(TEMPORAL_DO, INTERVAL_DO) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_between_only)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
# 3. Anderson-Darling normality test
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN, INTERVAL) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
homogeneity_domain <- long_data_clean %>%
group_by(TIME, INTERVAL) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
homogeneity_interval <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ INTERVAL)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_interval)
# 5. Hartley's F-max test for between-subjects factors
# Check what values the between-subjects factors actually have
print(unique(long_data_clean$TEMPORAL_DO))
print(unique(long_data_clean$INTERVAL_DO))
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN × INTERVAL combination
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, INTERVAL, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across INTERVAL_DO within each TIME × DOMAIN × TEMPORAL_DO combination
observed_interval_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN, TEMPORAL_DO) %>%
summarise(
# Calculate variances for each INTERVAL_DO level within this combination
int5_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "5"], na.rm = TRUE),
int10_var = var(MEAN_DIFFERENCE[INTERVAL_DO == "10"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(int5_var, int10_var) / min(int5_var, int10_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, TEMPORAL_DO, int5_var, int10_var, f_max_ratio)
print(observed_interval_ratios)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check data dimensions and structure
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
print(paste("Number of participants:", length(unique(long_data_clean$ResponseId))))
print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (",
length(levels(long_data_clean$DOMAIN)), "), INTERVAL (",
length(levels(long_data_clean$INTERVAL)), "), TEMPORAL_DO (",
length(levels(long_data_clean$TEMPORAL_DO)), "), INTERVAL_DO (",
length(levels(long_data_clean$INTERVAL_DO)), ")", sep = ""))
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
# Check if design is balanced
design_balance <- table(long_data_clean$ResponseId, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
# =============================================================================
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
# =============================================================================
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT) × INTERVAL_DO (2 levels: 5, 10)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = ResponseId,
between = .(TEMPORAL_DO, INTERVAL_DO),
within = .(TIME, DOMAIN, INTERVAL),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
# Show Mauchly's test for sphericity
print(mixed_anova_model$Mauchly)
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
print("\n=== CORRECTED DEGREES OF FREEDOM ===")
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
print("\n=== CORRECTED F-TESTS ===")
# Between-subjects effects (no sphericity corrections needed)
print("\nBETWEEN-SUBJECTS EFFECTS:")
between_effects <- c("TEMPORAL_DO", "INTERVAL_DO", "TEMPORAL_DO:INTERVAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
print(sprintf("%s: F(%d, %d) = %.3f, p = %.6f", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
print("\nWITHIN-SUBJECTS EFFECTS:")
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
print(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (3 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
print(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
}
# INTERVAL main effect (3 levels, needs sphericity correction)
if("INTERVAL" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "INTERVAL"]
dfn <- anova_table$DFn[anova_table$Effect == "INTERVAL"]
dfd <- anova_table$DFd[anova_table$Effect == "INTERVAL"]
p_value <- anova_table$p[anova_table$Effect == "INTERVAL"]
print(sprintf("INTERVAL: F(%d, %d) = %.3f, p = %.6f", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
print("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:")
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
print(sprintf("\n%s:", effect))
print(sprintf(" Original: F(%d, %d) = %.3f",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
print(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
print(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
# Extract generalized eta squared from ezANOVA (already calculated)
effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
effect_sizes$ges <- round(effect_sizes$ges, 5)
print(effect_sizes)
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * INTERVAL_DO * TIME * DOMAIN * INTERVAL + Error(ResponseId/(TIME * DOMAIN * INTERVAL)),
data = long_data_clean)
# Main effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of DOMAIN
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
print(domain_contrasts)
# Main effect of INTERVAL
interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
print(interval_emmeans)
interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
print(interval_contrasts)
# Main effect of TEMPORAL_DO
temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
print(temporal_contrasts)
# Main effect of INTERVAL_DO
interval_do_emmeans <- emmeans(aov_model, ~ INTERVAL_DO)
interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
print(interval_do_contrasts)
# =============================================================================
# COHEN'S D FOR MAIN EFFECTS
# =============================================================================
# Main Effect of TIME (if significant)
time_main_contrast <- pairs(time_emmeans, adjust = "none")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
print("\nCohen's d for TIME main effect:")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
print(sprintf("Past vs Future: n1 = %d, n2 = %d", length(time_past_data), length(time_future_data)))
print(sprintf("Cohen's d: %.5f", time_cohens_d$estimate))
print(sprintf("Effect size interpretation: %s", time_cohens_d$magnitude))
print(sprintf("p-value: %.5f", time_main_df$p.value[1]))
}
# Main Effect of DOMAIN (if significant)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
print("\nCohen's d for significant DOMAIN contrasts:")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
print(sprintf("Comparison: %s", contrast_name))
print(sprintf(" n1 = %d, n2 = %d", length(data1), length(data2)))
print(sprintf(" Cohen's d: %.5f", domain_cohens_d$estimate))
print(sprintf(" Effect size interpretation: %s", domain_cohens_d$magnitude))
print(sprintf(" p-value: %.5f", significant_domain$p.value[i]))
print("")
}
}
}
}
# Main Effect of INTERVAL (if significant)
interval_main_contrast <- pairs(interval_emmeans, adjust = "bonferroni")
interval_main_df <- as.data.frame(interval_main_contrast)
print(interval_main_df)
# Calculate Cohen's d for significant INTERVAL contrasts
significant_interval <- interval_main_df[interval_main_df$p.value < 0.05, ]
if(nrow(significant_interval) > 0) {
print("\nCohen's d for significant INTERVAL contrasts:")
for(i in seq_len(nrow(significant_interval))) {
contrast_name <- as.character(significant_interval$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$INTERVAL == level2]
if(length(data1) > 0 && length(data2) > 0) {
interval_cohens_d <- cohen.d(data1, data2)
print(sprintf("Comparison: %s", contrast_name))
print(sprintf(" n1 = %d, n2 = %d", length(data1), length(data2)))
print(sprintf(" Cohen's d: %.5f", interval_cohens_d$estimate))
print(sprintf(" Effect size interpretation: %s", interval_cohens_d$magnitude))
print(sprintf(" p-value: %.5f", significant_interval$p.value[i]))
print("")
}
}
}
}
# =============================================================================
# INTERACTION EXPLORATIONS (if significant)
# =============================================================================
# First, identify which interactions are significant
significant_interactions <- anova_output[anova_output$p < 0.05 & grepl(":", anova_output$Effect), ]
if(nrow(significant_interactions) > 0) {
print("Significant interactions found:")
print(significant_interactions[, c("Effect", "p")])
# TIME × DOMAIN interaction (if significant)
if("TIME:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:DOMAIN"] < 0.05) {
print("\n=== TIME × DOMAIN INTERACTION (SIGNIFICANT) ===")
time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
print("Estimated Marginal Means:")
print(time_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TIME:")
time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
print(time_domain_simple)
print("\nSimple Effects of TIME within each DOMAIN:")
time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(time_domain_simple2)
}
# TIME × INTERVAL interaction (if significant)
if("TIME:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TIME:INTERVAL"] < 0.05) {
print("\n=== TIME × INTERVAL INTERACTION (SIGNIFICANT) ===")
time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
print("Estimated Marginal Means:")
print(time_interval_emmeans)
print("\nSimple Effects of INTERVAL within each TIME:")
time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
print(time_interval_simple)
print("\nSimple Effects of TIME within each INTERVAL:")
time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
print(time_interval_simple2)
}
# DOMAIN × INTERVAL interaction (if significant)
if("DOMAIN:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "DOMAIN:INTERVAL"] < 0.05) {
print("\n=== DOMAIN × INTERVAL INTERACTION (SIGNIFICANT) ===")
domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
print("Estimated Marginal Means:")
print(domain_interval_emmeans)
print("\nSimple Effects of INTERVAL within each DOMAIN:")
domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(domain_interval_simple)
print("\nSimple Effects of DOMAIN within each INTERVAL:")
domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
print(domain_interval_simple2)
}
# TEMPORAL_DO × TIME interaction (if significant)
if("TEMPORAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:TIME"] < 0.05) {
print("\n=== TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
print("Estimated Marginal Means:")
print(temporal_time_emmeans)
print("\nSimple Effects of TIME within each TEMPORAL_DO:")
temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_time_simple)
print("\nSimple Effects of TEMPORAL_DO within each TIME:")
temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
print(temporal_time_simple2)
}
# TEMPORAL_DO × DOMAIN interaction (if significant)
if("TEMPORAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:DOMAIN"] < 0.05) {
print("\n=== TEMPORAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
print("Estimated Marginal Means:")
print(temporal_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_domain_simple)
print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(temporal_domain_simple2)
}
# TEMPORAL_DO × INTERVAL interaction (if significant)
if("TEMPORAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "TEMPORAL_DO:INTERVAL"] < 0.05) {
print("\n=== TEMPORAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
temporal_interval_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * INTERVAL)
print("Estimated Marginal Means:")
print(temporal_interval_emmeans)
print("\nSimple Effects of INTERVAL within each TEMPORAL_DO:")
temporal_interval_simple <- pairs(temporal_interval_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_interval_simple)
print("\nSimple Effects of TEMPORAL_DO within each INTERVAL:")
temporal_interval_simple2 <- pairs(temporal_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
print(temporal_interval_simple2)
}
# INTERVAL_DO × TIME interaction (if significant)
if("INTERVAL_DO:TIME" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:TIME"] < 0.05) {
print("\n=== INTERVAL_DO × TIME INTERACTION (SIGNIFICANT) ===")
interval_do_time_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * TIME)
print("Estimated Marginal Means:")
print(interval_do_time_emmeans)
print("\nSimple Effects of TIME within each INTERVAL_DO:")
interval_do_time_simple <- pairs(interval_do_time_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
print(interval_do_time_simple)
print("\nSimple Effects of INTERVAL_DO within each TIME:")
interval_do_time_simple2 <- pairs(interval_do_time_emmeans, by = "TIME", adjust = "bonferroni")
print(interval_do_time_simple2)
}
# INTERVAL_DO × DOMAIN interaction (if significant)
if("INTERVAL_DO:DOMAIN" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:DOMAIN"] < 0.05) {
print("\n=== INTERVAL_DO × DOMAIN INTERACTION (SIGNIFICANT) ===")
interval_do_domain_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * DOMAIN)
print("Estimated Marginal Means:")
print(interval_do_domain_emmeans)
print("\nSimple Effects of DOMAIN within each INTERVAL_DO:")
interval_do_domain_simple <- pairs(interval_do_domain_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
print(interval_do_domain_simple)
print("\nSimple Effects of INTERVAL_DO within each DOMAIN:")
interval_do_domain_simple2 <- pairs(interval_do_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(interval_do_domain_simple2)
}
# INTERVAL_DO × INTERVAL interaction (if significant)
if("INTERVAL_DO:INTERVAL" %in% anova_output$Effect && anova_output$p[anova_output$Effect == "INTERVAL_DO:INTERVAL"] < 0.05) {
print("\n=== INTERVAL_DO × INTERVAL INTERACTION (SIGNIFICANT) ===")
interval_do_interval_emmeans <- emmeans(aov_model, ~ INTERVAL_DO * INTERVAL)
print("Estimated Marginal Means:")
print(interval_do_interval_emmeans)
print("\nSimple Effects of INTERVAL within each INTERVAL_DO:")
interval_do_interval_simple <- pairs(interval_do_interval_emmeans, by = "INTERVAL_DO", adjust = "bonferroni")
print(interval_do_interval_simple)
print("\nSimple Effects of INTERVAL_DO within each INTERVAL:")
interval_do_interval_simple2 <- pairs(interval_do_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
print(interval_do_interval_simple2)
}
# THREE-WAY INTERACTION: TIME × INTERVAL × TEMPORAL_DO
print("\n=== TIME × INTERVAL × TEMPORAL_DO THREE-WAY INTERACTION ===")
three_way_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL * TEMPORAL_DO)
print("Estimated Marginal Means:")
print(three_way_emmeans)
print("\nPast vs Future contrasts within each INTERVAL × TEMPORAL_DO combination:")
three_way_contrasts <- pairs(three_way_emmeans, by = c("INTERVAL", "TEMPORAL_DO"), adjust = "bonferroni")
print(three_way_contrasts)
# Calculate Cohen's d for Past vs Future contrasts
three_way_contrasts_df <- as.data.frame(three_way_contrasts)
print("\n=== COHEN'S D FOR TIME CONTRASTS IN THREE-WAY INTERACTION ===")
print("Past vs Future contrasts for all INTERVAL × TEMPORAL_DO combinations:")
for(i in seq_len(nrow(three_way_contrasts_df))) {
comparison <- three_way_contrasts_df[i, ]
# Extract the grouping variables
interval_level <- as.character(comparison$INTERVAL)
temporal_do_level <- as.character(comparison$TEMPORAL_DO)
# Get data for Past and Future within this INTERVAL × TEMPORAL_DO combination
past_data <- long_data_clean$MEAN_DIFFERENCE[
long_data_clean$INTERVAL == interval_level &
long_data_clean$TEMPORAL_DO == temporal_do_level &
long_data_clean$TIME == "Past"
]
future_data <- long_data_clean$MEAN_DIFFERENCE[
long_data_clean$INTERVAL == interval_level &
long_data_clean$TEMPORAL_DO == temporal_do_level &
long_data_clean$TIME == "Future"
]
if(length(past_data) > 0 && length(future_data) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(past_data, future_data)
cat(sprintf("INTERVAL = %s, TEMPORAL_DO = %s:\n", interval_level, temporal_do_level))
cat(sprintf(" Past vs Future comparison\n"))
cat(sprintf(" n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat(sprintf(" Estimated difference: %.5f\n", comparison$estimate))
cat("\n")
}
}
} else {
print("No significant interactions found.")
print("All interaction p-values:")
interaction_effects <- anova_output[grepl(":", anova_output$Effect), ]
print(interaction_effects[, c("Effect", "p")])
}
# =============================================================================
# INTERACTION PLOT: TEMPORAL_DO × TIME × INTERVAL (Emmeans only)
# =============================================================================
# Define color palette for TIME
time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
# Create emmeans for TEMPORAL_DO × TIME × INTERVAL
emm_3way <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * INTERVAL)
# Prepare emmeans data frame
emmeans_3way <- emm_3way %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
TIME = factor(TIME, levels = c("Past", "Future")),
INTERVAL = factor(INTERVAL),
x_pos = as.numeric(TEMPORAL_DO),
time_offset = (as.numeric(TIME) - 1.5) * 0.2,
x_dodged = x_pos + time_offset
)
# Create 3-way interaction plot with facets
interaction_plot_3way <- ggplot(emmeans_3way) +
geom_errorbar(
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.1,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
"5" = "Present v. 5 Years",
"10" = "Present v. 10 Years",
"5_10" = "5 Years v. 10 Years"
))) +
labs(
x = "Order",
y = "Mean absolute deviation",
title = "TEMPORAL_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
) +
scale_x_continuous(
breaks = c(1, 2),
labels = c("Past First", "Future First"),
limits = c(0.5, 2.5)
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
strip.text = element_text(size = 11, face = "bold")
)
print(interaction_plot_3way)
# =============================================================================
# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars)
# =============================================================================
# Prepare emmeans data frame for TIME main effect
time_emm_df <- time_emmeans %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TIME = factor(TIME, levels = c("Past", "Future"))
)
# Create TIME main-effect plot (style aligned with the 3-way interaction plot)
time_main_plot <- ggplot(time_emm_df) +
geom_errorbar(
aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.15,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Time",
y = "Mean absolute difference from present",
title = "Main Effect of TIME (Estimated Marginal Means)"
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(time_main_plot)