eohi/.history/eohi1/mixed anova - domain means_20250916100623.r
2025-12-23 15:47:09 -05:00

831 lines
33 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for Domain Means
# EOHI Experiment Data Analysis - Domain Level Analysis
# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
# NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$pID))))
# Verify the specific variables we need
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required domain mean variables found!")
}
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
# Domain mapping created
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$pID))))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME and DOMAIN
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME and DOMAIN:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
print(desc_stats_by_temporal)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME and DOMAIN:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each DOMAIN
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each DOMAIN:")
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across DOMAIN within each TIME:")
print(homogeneity_domain)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# =============================================================================
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
# =============================================================================
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
# First, let's check what values TEMPORAL_DO actually has
print("=== CHECKING TEMPORAL_DO VALUES ===")
print("Unique TEMPORAL_DO values:")
print(unique(long_data_clean$TEMPORAL_DO))
print("TEMPORAL_DO value counts:")
print(table(long_data_clean$TEMPORAL_DO))
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!sym(group_var)) %>%
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check for missing data patterns
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, useNA = "ifany")
# Check data balance
xtabs(~ pID + TIME + DOMAIN, data = long_data_clean)
# Check data dimensions and structure
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
print(paste("Number of DOMAIN levels:", length(levels(long_data_clean$DOMAIN))))
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
# Check for complete cases
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
print(summary(as.vector(design_balance)))
# Check for any participants with missing combinations
missing_combos <- long_data_clean %>%
group_by(pID) %>%
summarise(
n_combinations = n(),
expected_combinations = 8, # 2 TIME × 4 DOMAIN = 8
missing_combinations = 8 - n_combinations,
.groups = 'drop'
)
print("Missing combinations per participant:")
print(missing_combos[missing_combos$missing_combinations > 0, ])
# Mixed ANOVA using aov() - Traditional approach
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
print("Mixed ANOVA Results (aov):")
print(summary(mixed_anova_model))
# Alternative: Using afex::aov_ez for cleaner output (optional)
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
mixed_anova_afex <- aov_ez(id = "pID",
dv = "MEAN_DIFFERENCE",
data = long_data_clean,
between = "TEMPORAL_DO",
within = c("TIME", "DOMAIN"))
print("Mixed ANOVA Results (afex):")
print(mixed_anova_afex)
# =============================================================================
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
# =============================================================================
# Sphericity tests using ezANOVA (library already loaded)
print("\n=== SPHERICITY TESTS ===")
# Test sphericity for DOMAIN (4 levels - within-subjects)
print("Mauchly's Test of Sphericity for DOMAIN:")
tryCatch({
# Create a temporary data frame for ezANOVA
temp_data <- long_data_clean
temp_data$id <- as.numeric(as.factor(temp_data$pID))
# Run ezANOVA to get sphericity tests
ez_domain <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
between = TEMPORAL_DO,
within = DOMAIN,
type = 3,
detailed = TRUE)
print("DOMAIN Sphericity Test:")
print(ez_domain$Mauchly)
}, error = function(e) {
print(paste("Error in DOMAIN sphericity test:", e$message))
})
# Test sphericity for TIME (2 levels - within-subjects)
print("\nMauchly's Test of Sphericity for TIME:")
tryCatch({
ez_time <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
between = TEMPORAL_DO,
within = TIME,
type = 3,
detailed = TRUE)
print("TIME Sphericity Test:")
print(ez_time$Mauchly)
}, error = function(e) {
print(paste("Error in TIME sphericity test:", e$message))
})
# Test sphericity for TIME × DOMAIN interaction
print("\nMauchly's Test of Sphericity for TIME × DOMAIN Interaction:")
tryCatch({
ez_interaction <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
print("TIME × DOMAIN Sphericity Test:")
print(ez_interaction$Mauchly)
}, error = function(e) {
print(paste("Error in TIME × DOMAIN sphericity test:", e$message))
})
# =============================================================================
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
# Get corrected results from ezANOVA
ez_corrected <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
print("Corrected ANOVA Results with Sphericity Corrections:")
print(ez_corrected$ANOVA)
# Show epsilon values for sphericity corrections
print("\nEpsilon Values for Sphericity Corrections:")
print(ez_corrected$Mauchly)
# Show sphericity-corrected results
print("\nSphericity-Corrected Results:")
print("Available elements in ez_corrected object:")
print(names(ez_corrected))
# Check if sphericity corrections are available
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
print(ez_corrected$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
# Get the sphericity corrections
sphericity_corr <- ez_corrected$`Sphericity Corrections`
# Extract original degrees of freedom from ANOVA table
anova_table <- ez_corrected$ANOVA
# Calculate corrected degrees of freedom
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Also show the corrected F-values and p-values with degrees of freedom
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
for(i in 1:nrow(corrected_df)) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
print("Check the Mauchly's test p-values above to determine if corrections are needed")
}
# =============================================================================
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
# =============================================================================
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
# Create a wide-format data for car package (library already loaded)
tryCatch({
# Convert to wide format for car package
wide_data <- long_data_clean %>%
select(pID, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
pivot_wider(names_from = c(TIME, DOMAIN),
values_from = MEAN_DIFFERENCE,
names_sep = "_")
# Create the repeated measures design
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
# Check if all columns exist
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
if(length(missing_cols) > 0) {
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
} else {
# Create the repeated measures design
rm_design <- as.matrix(wide_data[, within_vars])
# Calculate epsilon values
print("Epsilon Values from car package:")
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
# Interpretation
if(epsilon_gg < 0.75) {
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
} else if(epsilon_hf > 0.75) {
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
} else {
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
}
# =============================================================================
# MANUAL SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
# Apply corrections to the original ANOVA results
print("Applying Greenhouse-Geisser corrections to DOMAIN effects:")
# DOMAIN main effect (DFn = 3, DFd = 3183)
domain_df_corrected_gg <- 3 * epsilon_gg
domain_df_corrected_hf <- 3 * epsilon_hf
print(paste("DOMAIN: Original df = 3, 3183"))
print(paste("DOMAIN: GG corrected df =", round(domain_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
print(paste("DOMAIN: HF corrected df =", round(domain_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
# TIME × DOMAIN interaction (DFn = 3, DFd = 3183)
interaction_df_corrected_gg <- 3 * epsilon_gg
interaction_df_corrected_hf <- 3 * epsilon_hf
print(paste("TIME × DOMAIN: Original df = 3, 3183"))
print(paste("TIME × DOMAIN: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(3183 * epsilon_gg, 2)))
print(paste("TIME × DOMAIN: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(3183 * epsilon_hf, 2)))
# Note: You would need to recalculate p-values with these corrected dfs
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
print("The ezANOVA function should handle this automatically, but may not display the corrections")
}
}, error = function(e) {
print(paste("Error in manual epsilon calculation:", e$message))
})
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
# Effect size calculations (library already loaded)
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Calculate generalized eta squared for the aov model
print("Effect Sizes from aov() model:")
tryCatch({
# Extract effect sizes from aov model
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
print(round(aov_effects, 5))
}, error = function(e) {
print(paste("Error calculating effect sizes from aov:", e$message))
})
# Calculate effect sizes for ezANOVA model
print("\nEffect Sizes from ezANOVA model:")
tryCatch({
# ezANOVA provides partial eta squared, convert to generalized
ez_effects <- ez_corrected$ANOVA
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
print("Generalized Eta Squared from ezANOVA:")
print(round(ez_effects[, c("Effect", "ges")], 5))
}, error = function(e) {
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
})
# Extract effect sizes (generalized eta squared)
# For aov() objects, we need to extract from the summary
anova_summary <- summary(mixed_anova_model)
# =============================================================================
# NOTE: MIXED MODELS (LMER) NOT NEEDED
# =============================================================================
# For this balanced repeated measures design, Type III ANOVA with proper
# sphericity corrections (implemented above) is the most appropriate approach.
# Mixed models (lmer) are typically used for:
# - Unbalanced designs
# - Missing data patterns
# - Nested random effects
# - Large, complex datasets
#
# Your design is balanced and complete, making Type III ANOVA optimal.
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
print("\n=== POST-HOC COMPARISONS ===")
# Main effect of TIME
print("Main Effect of TIME:")
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of DOMAIN
print("\nMain Effect of DOMAIN:")
domain_emmeans <- emmeans(mixed_anova_model, ~ DOMAIN)
domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
print(domain_contrasts)
# Main effect of TEMPORAL_DO
print("\nMain Effect of TEMPORAL_DO:")
temporal_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO)
temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
print(temporal_contrasts)
# =============================================================================
# INTERACTION EXPLORATIONS
# =============================================================================
# TEMPORAL_DO × TIME Interaction (Significant: p = 0.001)
print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
temporal_time_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME)
print("Estimated Marginal Means:")
print(temporal_time_emmeans)
print("\nSimple Effects of TIME within each TEMPORAL_DO:")
temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_time_simple)
print("\nSimple Effects of TEMPORAL_DO within each TIME:")
temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
print(temporal_time_simple2)
# TIME × DOMAIN Interaction (Significant: p = 0.012)
print("\n=== TIME × DOMAIN INTERACTION ===")
time_domain_emmeans <- emmeans(mixed_anova_model, ~ TIME * DOMAIN)
print("Estimated Marginal Means:")
print(time_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TIME:")
time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
print(time_domain_simple)
print("\nSimple Effects of TIME within each DOMAIN:")
time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(time_domain_simple2)
# TEMPORAL_DO × DOMAIN Interaction (Marginal: p = 0.058)
print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
temporal_domain_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * DOMAIN)
print("Estimated Marginal Means:")
print(temporal_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_domain_simple)
print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(temporal_domain_simple2)
# =============================================================================
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
# =============================================================================
# All pairwise comparisons for the three-way interaction
print("\n=== COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS ===")
three_way_emmeans <- emmeans(mixed_anova_model, ~ TEMPORAL_DO * TIME * DOMAIN)
print("Estimated Marginal Means for all combinations:")
print(three_way_emmeans)
# Pairwise comparisons within each TEMPORAL_DO × TIME combination
print("\nPairwise comparisons of DOMAIN within each TEMPORAL_DO × TIME combination:")
three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "TIME"), adjust = "bonferroni")
print(three_way_contrasts)
# =============================================================================
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# =============================================================================
# Cohen's d calculations (library already loaded)
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# =============================================================================
# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
# =============================================================================
print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
# Get simple effects of TIME within each TEMPORAL_DO
temporal_time_simple_df <- as.data.frame(temporal_time_simple)
calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# Get simple effects of TEMPORAL_DO within each TIME
temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
# =============================================================================
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
# =============================================================================
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
# Get simple effects of TIME within each DOMAIN
time_domain_simple2_df <- as.data.frame(time_domain_simple2)
calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
# Get simple effects of DOMAIN within each TIME
time_domain_simple_df <- as.data.frame(time_domain_simple)
calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
# =============================================================================
# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
# =============================================================================
print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
# Get simple effects of TEMPORAL_DO within each DOMAIN
temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
# Get simple effects of DOMAIN within each TEMPORAL_DO
temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")