eohi/.history/eohi1/mixed anova - domain means_20251003142645.r
2025-12-23 15:47:09 -05:00

866 lines
34 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for Domain Means
# EOHI Experiment Data Analysis - Domain Level Analysis
# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
# NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$pID))))
# Verify the specific variables we need
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required domain mean variables found!")
}
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
# Domain mapping created
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$pID))))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME and DOMAIN
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME and DOMAIN:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
print(desc_stats_by_temporal)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME and DOMAIN:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each DOMAIN
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each DOMAIN:")
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across DOMAIN within each TIME:")
print(homogeneity_domain)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# =============================================================================
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
# =============================================================================
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
# First, let's check what values TEMPORAL_DO actually has
print("=== CHECKING TEMPORAL_DO VALUES ===")
print("Unique TEMPORAL_DO values:")
print(unique(long_data_clean$TEMPORAL_DO))
print("TEMPORAL_DO value counts:")
print(table(long_data_clean$TEMPORAL_DO))
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check data dimensions and structure
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
print(paste("Design factors: TIME (", length(levels(long_data_clean$TIME)), "), DOMAIN (",
length(levels(long_data_clean$DOMAIN)), "), TEMPORAL_DO (",
length(levels(long_data_clean$TEMPORAL_DO)), ")", sep = ""))
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(paste("Complete cases:", complete_cases, "out of", nrow(long_data_clean)))
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
# =============================================================================
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
print("ANOVA Results:")
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
# Show Mauchly's test for sphericity
print("\nMauchly's Test of Sphericity:")
print(mixed_anova_model$Mauchly)
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
cat("\n=== CORRECTED F-TESTS ===\n")
# Between-subjects effects (no sphericity corrections needed)
cat("\nBETWEEN-SUBJECTS EFFECTS:\n")
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
cat("\nWITHIN-SUBJECTS EFFECTS:\n")
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
cat("\nINTERACTIONS WITH SPHERICITY CORRECTIONS:\n")
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Extract generalized eta squared from ezANOVA (already calculated)
effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
effect_sizes$ges <- round(effect_sizes$ges, 5)
print("Generalized Eta Squared:")
print(effect_sizes)
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
print("\n=== POST-HOC COMPARISONS ===")
# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main effect of TIME
print("Main Effect of TIME:")
time_emmeans <- emmeans(aov_model, ~ TIME)
print("Estimated Marginal Means:")
print(time_emmeans)
print("\nPairwise Contrasts:")
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of DOMAIN
print("\nMain Effect of DOMAIN:")
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print("Estimated Marginal Means:")
print(domain_emmeans)
print("\nPairwise Contrasts:")
domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
print(domain_contrasts)
# Main effect of TEMPORAL_DO
print("\nMain Effect of TEMPORAL_DO:")
temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
print(temporal_contrasts)
# =============================================================================
# INTERACTION EXPLORATIONS
# =============================================================================
# TEMPORAL_DO × TIME Interaction
print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
print("Estimated Marginal Means:")
print(temporal_time_emmeans)
print("\nSimple Effects of TIME within each TEMPORAL_DO:")
temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_time_simple)
print("\nSimple Effects of TEMPORAL_DO within each TIME:")
temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
print(temporal_time_simple2)
# TIME × DOMAIN Interaction
print("\n=== TIME × DOMAIN INTERACTION ===")
time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
print("Estimated Marginal Means:")
print(time_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TIME:")
time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
print(time_domain_simple)
print("\nSimple Effects of TIME within each DOMAIN:")
time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(time_domain_simple2)
# TEMPORAL_DO × DOMAIN Interaction
print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
print("Estimated Marginal Means:")
print(temporal_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_domain_simple)
print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(temporal_domain_simple2)
# =============================================================================
# THREE-WAY INTERACTION ANALYSIS (SIMPLIFIED)
# =============================================================================
print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
print("Note: Three-way interaction was non-significant (p = 0.511)")
print("Skipping detailed three-way comparisons due to computational intensity")
print("Focus on the significant two-way interactions above.")
# =============================================================================
# COHEN'S D FOR MAIN EFFECTS
# =============================================================================
print("\n=== COHEN'S D FOR MAIN EFFECTS ===")
# Main Effect of TIME (significant: p < 0.001)
print("\n=== COHEN'S D FOR TIME MAIN EFFECT ===")
time_main_contrast <- pairs(time_emmeans, adjust = "none")
time_main_df <- as.data.frame(time_main_contrast)
print("TIME main effect contrast:")
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
# Main Effect of DOMAIN (significant: p < 0.001)
print("\n=== COHEN'S D FOR DOMAIN MAIN EFFECT ===")
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print("DOMAIN main effect contrasts:")
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
# =============================================================================
# INTERACTION PLOT
# =============================================================================
print("\n=== CREATING INTERACTION PLOT ===")
# Load ggplot2 for plotting (if not already loaded)
library(ggplot2)
# Define color palette for DOMAIN (4 levels)
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
# Define TIME levels (Past, Future order)
time_levels <- c("Past", "Future")
# Prepare raw data with standard TIME levels
iPlot <- long_data_clean %>%
dplyr::select(pID, DOMAIN, TIME, TEMPORAL_DO, MEAN_DIFFERENCE) %>%
mutate(
DOMAIN = factor(DOMAIN),
TIME = factor(TIME, levels = time_levels),
TEMPORAL_DO = factor(TEMPORAL_DO)
)
# Create estimated marginal means for the interaction plot
emm_full <- emmeans(aov_model, ~ DOMAIN | TIME * TEMPORAL_DO)
# Convert EMMs to data frame and prepare for plotting
emmeans_data2 <- emm_full %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels),
TEMPORAL_DO = factor(TEMPORAL_DO)
)
# Create the interaction plot
interaction_plot2 <- ggplot() +
# Raw data: regular circles, color only
geom_point(
data = iPlot,
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
alpha = 0.3, shape = 16
) +
geom_rect(
data = emmeans_data2,
aes(
xmin = as.numeric(TIME) - 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
xmax = as.numeric(TIME) + 0.15 + (as.numeric(DOMAIN) - 2.5) * 0.25,
ymin = ci_lower, ymax = ci_upper,
fill = DOMAIN
),
color = "black", alpha = 0.5
) +
geom_segment(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
y = ci_lower, yend = ci_upper
),
color = "black"
) +
# EMMs: bold points, distinctive by color and shape
geom_point(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.25,
y = plot_mean,
color = DOMAIN,
shape = DOMAIN
),
size = 1.5, stroke = 0.5, fill = "black"
) +
facet_wrap(~ TEMPORAL_DO, ncol = 2) +
labs(
x = "TIME", y = "Mean Difference",
title = "DOMAIN × TIME Interaction by TEMPORAL_DO", subtitle = ""
) +
scale_color_manual(name = "DOMAIN", values = cbp1) +
scale_fill_manual(name = "DOMAIN", values = cbp1) +
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
plot.title = element_text(size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5)
)
print(interaction_plot2)
# =============================================================================
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# =============================================================================
# Cohen's d calculations (library already loaded)
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# =============================================================================
# 1. TEMPORAL_DO × TIME INTERACTION (SIGNIFICANT: p = 0.001)
# =============================================================================
print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
# Get simple effects of TIME within each TEMPORAL_DO
temporal_time_simple_df <- as.data.frame(temporal_time_simple)
calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# Get simple effects of TEMPORAL_DO within each TIME
temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "MEAN_DIFFERENCE")
# =============================================================================
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
# =============================================================================
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
# Get simple effects of TIME within each DOMAIN
time_domain_simple2_df <- as.data.frame(time_domain_simple2)
calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "MEAN_DIFFERENCE")
# Get simple effects of DOMAIN within each TIME
time_domain_simple_df <- as.data.frame(time_domain_simple)
calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
# =============================================================================
# 3. TEMPORAL_DO × DOMAIN INTERACTION (MARGINAL: p = 0.058)
# =============================================================================
print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
# Get simple effects of TEMPORAL_DO within each DOMAIN
temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "MEAN_DIFFERENCE")
# Get simple effects of DOMAIN within each TEMPORAL_DO
temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# =============================================================================
# 4. THREE-WAY INTERACTION COHEN'S D
# =============================================================================
print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
# Get pairwise comparisons for the three-way interaction
# three_way_contrasts_df <- as.data.frame(three_way_contrasts)
# print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
# print(three_way_contrasts_df)
print("Note: Three-way interaction was non-significant (p = 0.511), so detailed comparisons were not performed.")
print("\n=== ANALYSIS COMPLETE ===")
print("All significant and marginal effects have been analyzed with Cohen's d calculations.")
# Calculate Cohen's d for significant three-way interaction effects
print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
# Extract significant comparisons (p < 0.05)
# significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
if(FALSE) { # Three-way interaction was non-significant, so skip this section
for(i in seq_len(nrow(significant_three_way))) {
comparison <- significant_three_way[i, ]
# Extract the grouping variables
temporal_do_level <- as.character(comparison$TEMPORAL_DO)
domain_level <- as.character(comparison$DOMAIN)
# Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
past_data <- long_data_clean$MEAN_DIFFERENCE[
long_data_clean$TEMPORAL_DO == temporal_do_level &
long_data_clean$DOMAIN == domain_level &
long_data_clean$TIME == "Past"
]
future_data <- long_data_clean$MEAN_DIFFERENCE[
long_data_clean$TEMPORAL_DO == temporal_do_level &
long_data_clean$DOMAIN == domain_level &
long_data_clean$TIME == "Future"
]
if(length(past_data) > 0 && length(future_data) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(past_data, future_data)
cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
cat(sprintf(" Past vs Future comparison\n"))
cat(sprintf(" n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat(sprintf(" Estimated difference: %.5f\n", comparison$estimate))
cat("\n")
}
}
} else {
cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
}