eohi/eohi2/mixed anova - DGEN.r
2025-12-23 15:47:09 -05:00

896 lines
34 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for DGEN Variables
# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val,
# DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
# DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val,
# DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val,
# X5_10DGEN_past_pref, X5_10DGEN_past_pers, X5_10DGEN_past_val,
# X5_10DGEN_fut_pref, X5_10DGEN_fut_pers, X5_10DGEN_fut_val
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
cat("anova - dgen", getwd(), "\n")
# Read the data
data <- read.csv("eohi2.csv")
# Verify the specific variables we need
required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val",
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
}
# Define variable mapping for the three within-subjects factors
variable_mapping <- data.frame(
variable = required_vars,
TIME = c(rep("Past", 3), rep("Past", 3), rep("Future", 3), rep("Future", 3),
rep("Past", 3), rep("Future", 3)),
DOMAIN = rep(c("Preferences", "Personality", "Values"), 6),
INTERVAL = c(rep("5", 3), rep("10", 3), rep("5", 3), rep("10", 3),
rep("5_10", 3), rep("5_10", 3)),
stringsAsFactors = FALSE
)
print("Variable mapping:")
print(variable_mapping)
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(ResponseId, TEMPORAL_DO, INTERVAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "DGEN_SCORE"
) %>%
left_join(variable_mapping, by = "variable") %>%
# Convert to factors with proper levels
mutate(
TIME = factor(TIME, levels = c("Past", "Future")),
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
pID = as.factor(ResponseId), # Use ResponseId as participant ID
temporal_DO = as.factor(TEMPORAL_DO),
interval_DO = as.factor(INTERVAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
filter(!is.na(DGEN_SCORE))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
desc_stats <- long_data %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_between <- long_data %>%
group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by between-subjects factors:")
print(desc_stats_by_between)
# Calculate mean differences for key comparisons
print("\n=== KEY MEAN DIFFERENCES ===")
# Past vs Future differences for each DOMAIN × INTERVAL combination
past_future_diffs <- long_data %>%
group_by(DOMAIN, INTERVAL, pID) %>%
summarise(
past_score = DGEN_SCORE[TIME == "Past"],
future_score = DGEN_SCORE[TIME == "Future"],
difference = past_score - future_score,
.groups = 'drop'
) %>%
group_by(DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean_diff = round(mean(difference, na.rm = TRUE), 5),
sd_diff = round(sd(difference, na.rm = TRUE), 5),
se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
.groups = 'drop'
)
print("Past vs Future differences by DOMAIN × INTERVAL:")
print(past_future_diffs)
# 5 vs 10 interval differences for each TIME × DOMAIN combination
interval_diffs <- long_data %>%
group_by(TIME, DOMAIN, pID) %>%
summarise(
interval_5_score = DGEN_SCORE[INTERVAL == "5"],
interval_10_score = DGEN_SCORE[INTERVAL == "10"],
difference = interval_5_score - interval_10_score,
.groups = 'drop'
) %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean_diff = round(mean(difference, na.rm = TRUE), 5),
sd_diff = round(sd(difference, na.rm = TRUE), 5),
se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
.groups = 'drop'
)
print("\n5 vs 10 interval differences by TIME × DOMAIN:")
print(interval_diffs)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(DGEN_SCORE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME, DOMAIN, and INTERVAL:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = mean(DGEN_SCORE),
sd = sd(DGEN_SCORE),
q1 = quantile(DGEN_SCORE, 0.25),
q3 = quantile(DGEN_SCORE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance tests
# Test homogeneity across TIME within each DOMAIN × INTERVAL combination
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN, INTERVAL) %>%
summarise(
levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each DOMAIN × INTERVAL:")
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME × INTERVAL combination
homogeneity_domain <- long_data_clean %>%
group_by(TIME, INTERVAL) %>%
summarise(
levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across DOMAIN within each TIME × INTERVAL:")
print(homogeneity_domain)
# Test homogeneity across INTERVAL within each TIME × DOMAIN combination
homogeneity_interval <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
levene_F = leveneTest(DGEN_SCORE ~ INTERVAL)$`F value`[1],
levene_p = leveneTest(DGEN_SCORE ~ INTERVAL)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across INTERVAL within each TIME × DOMAIN:")
print(homogeneity_interval)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across between-subjects factors within each within-subjects combination
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
# Test across temporal_DO within each TIME × DOMAIN × INTERVAL combination
print("F-max test across temporal_DO within each TIME × DOMAIN × INTERVAL combination:")
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(temporal_DO, DGEN_SCORE), "temporal_DO", "DGEN_SCORE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# Test across interval_DO within each TIME × DOMAIN × INTERVAL combination
print("F-max test across interval_DO within each TIME × DOMAIN × INTERVAL combination:")
hartley_interval_results <- long_data_clean %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(interval_DO, DGEN_SCORE), "interval_DO", "DGEN_SCORE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, INTERVAL, observed_ratio, critical_95, significant)
print(hartley_interval_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN, long_data_clean$INTERVAL)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN × INTERVAL combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
# =============================================================================
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
# =============================================================================
print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: temporal_DO (2 levels) × interval_DO (2 levels)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (3 levels: Preferences, Personality, Values) × INTERVAL (3 levels: 5, 10, 5_10)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = DGEN_SCORE,
wid = pID,
between = .(temporal_DO, interval_DO),
within = .(TIME, DOMAIN, INTERVAL),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
print("Mauchly's Test of Sphericity:")
print(mixed_anova_model$Mauchly)
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
print(mixed_anova_model$`Sphericity Corrections`)
print("=== CORRECTED DEGREES OF FREEDOM ===")
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
print("=== CORRECTED F-TESTS ===")
corrected_results <- data.frame(
Effect = corrected_df$Effect,
Original_F = anova_table$F[match(corrected_df$Effect, anova_table$Effect)],
Original_DFn = corrected_df$Original_DFn,
Original_DFd = corrected_df$Original_DFd,
GG_DFn = corrected_df$GG_DFn,
GG_DFd = corrected_df$GG_DFd,
HF_DFn = corrected_df$HF_DFn,
HF_DFd = corrected_df$HF_DFd,
GG_p = sphericity_corr$`p[GG]`,
HF_p = sphericity_corr$`p[HF]`
)
print(corrected_results)
}
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
print("=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Extract generalized eta squared from ezANOVA (already calculated)
effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
effect_sizes$ges <- round(effect_sizes$ges, 5)
print(effect_sizes)
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
print("=== POST-HOC COMPARISONS ===")
# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
aov_model <- aov(DGEN_SCORE ~ temporal_DO * interval_DO * TIME * DOMAIN * INTERVAL + Error(pID/(TIME * DOMAIN * INTERVAL)),
data = long_data_clean)
# Main effects
print("Main Effect of TIME:")
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
print("Main Effect of DOMAIN:")
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
print(domain_contrasts)
print("Main Effect of INTERVAL:")
interval_emmeans <- emmeans(aov_model, ~ INTERVAL)
print(interval_emmeans)
interval_contrasts <- pairs(interval_emmeans, adjust = "bonferroni")
print(interval_contrasts)
print("Main Effect of temporal_DO:")
temporal_emmeans <- emmeans(aov_model, ~ temporal_DO)
temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
print(temporal_contrasts)
print("Main Effect of interval_DO:")
interval_do_emmeans <- emmeans(aov_model, ~ interval_DO)
interval_do_contrasts <- pairs(interval_do_emmeans, adjust = "bonferroni")
print(interval_do_contrasts)
# =============================================================================
# TWO-WAY INTERACTION EXPLORATIONS
# =============================================================================
# TIME × DOMAIN Interaction
print("=== TIME × DOMAIN INTERACTION ===")
time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
print(time_domain_emmeans)
time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
print(time_domain_simple)
time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(time_domain_simple2)
# TIME × INTERVAL Interaction
print("=== TIME × INTERVAL INTERACTION ===")
time_interval_emmeans <- emmeans(aov_model, ~ TIME * INTERVAL)
print(time_interval_emmeans)
time_interval_simple <- pairs(time_interval_emmeans, by = "TIME", adjust = "bonferroni")
print(time_interval_simple)
time_interval_simple2 <- pairs(time_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
print(time_interval_simple2)
# DOMAIN × INTERVAL Interaction
print("=== DOMAIN × INTERVAL INTERACTION ===")
domain_interval_emmeans <- emmeans(aov_model, ~ DOMAIN * INTERVAL)
print(domain_interval_emmeans)
domain_interval_simple <- pairs(domain_interval_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(domain_interval_simple)
domain_interval_simple2 <- pairs(domain_interval_emmeans, by = "INTERVAL", adjust = "bonferroni")
print(domain_interval_simple2)
# Between-subjects interactions
print("=== temporal_DO × interval_DO INTERACTION ===")
temporal_interval_do_emmeans <- emmeans(aov_model, ~ temporal_DO * interval_DO)
print(temporal_interval_do_emmeans)
temporal_interval_do_simple <- pairs(temporal_interval_do_emmeans, by = "temporal_DO", adjust = "bonferroni")
print(temporal_interval_do_simple)
temporal_interval_do_simple2 <- pairs(temporal_interval_do_emmeans, by = "interval_DO", adjust = "bonferroni")
print(temporal_interval_do_simple2)
# =============================================================================
# THREE-WAY INTERACTION ANALYSES
# =============================================================================
# TIME × DOMAIN × INTERVAL Interaction
print("=== TIME × DOMAIN × INTERVAL INTERACTION ===")
three_way_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN * INTERVAL)
print(three_way_emmeans)
three_way_contrasts <- pairs(three_way_emmeans, by = c("DOMAIN", "INTERVAL"), adjust = "bonferroni")
print(three_way_contrasts)
# Between-subjects × within-subjects interactions
print("=== temporal_DO × TIME INTERACTION ===")
temporal_time_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME)
print(temporal_time_emmeans)
temporal_time_simple <- pairs(temporal_time_emmeans, by = "temporal_DO", adjust = "bonferroni")
print(temporal_time_simple)
temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
print(temporal_time_simple2)
# temporal_DO × TIME × INTERVAL three-way interaction
print("=== temporal_DO × TIME × INTERVAL INTERACTION ===")
temporal_time_interval_emmeans <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
print(temporal_time_interval_emmeans)
# Simple effects of TIME within each temporal_DO × INTERVAL combination
temporal_time_interval_simple1 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "INTERVAL"), adjust = "bonferroni")
print("Simple effects of TIME within each temporal_DO × INTERVAL:")
print(temporal_time_interval_simple1)
# Simple effects of temporal_DO within each TIME × INTERVAL combination
temporal_time_interval_simple2 <- pairs(temporal_time_interval_emmeans, by = c("TIME", "INTERVAL"), adjust = "bonferroni")
print("Simple effects of temporal_DO within each TIME × INTERVAL:")
print(temporal_time_interval_simple2)
# Simple effects of INTERVAL within each temporal_DO × TIME combination
temporal_time_interval_simple3 <- pairs(temporal_time_interval_emmeans, by = c("temporal_DO", "TIME"), adjust = "bonferroni")
print("Simple effects of INTERVAL within each temporal_DO × TIME:")
print(temporal_time_interval_simple3)
# =============================================================================
# COHEN'S D CALCULATIONS FOR SIGNIFICANT EFFECTS
# =============================================================================
print("=== COHEN'S D FOR SIGNIFICANT EFFECTS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
print(significant_pairs)
# Calculate Cohen's d for all significant pairs
cohens_d_results <- data.frame()
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
result_row <- data.frame(
Comparison = contrast_name,
n1 = length(data1),
n2 = length(data2),
Cohens_d = round(cohens_d_result$estimate, 5),
Effect_size = cohens_d_result$magnitude,
p_value = round(comparison$p.value, 5)
)
if(!is.null(group2_var) && group2_var %in% colnames(comparison)) {
result_row$Group_level <- group2_level
}
cohens_d_results <- rbind(cohens_d_results, result_row)
}
}
}
if(nrow(cohens_d_results) > 0) {
print(cohens_d_results)
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# Calculate Cohen's d for main effects
time_contrasts_df <- as.data.frame(time_contrasts)
calculate_cohens_d_for_pairs(time_contrasts_df, long_data_clean, "TIME", NULL, "DGEN_SCORE")
domain_contrasts_df <- as.data.frame(domain_contrasts)
calculate_cohens_d_for_pairs(domain_contrasts_df, long_data_clean, "DOMAIN", NULL, "DGEN_SCORE")
interval_contrasts_df <- as.data.frame(interval_contrasts)
calculate_cohens_d_for_pairs(interval_contrasts_df, long_data_clean, "INTERVAL", NULL, "DGEN_SCORE")
# Calculate Cohen's d for two-way interactions
time_domain_simple_df <- as.data.frame(time_domain_simple)
calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
time_domain_simple2_df <- as.data.frame(time_domain_simple2)
calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
# Calculate Cohen's d for temporal_DO × TIME × INTERVAL interaction
print("=== COHEN'S D FOR temporal_DO × TIME × INTERVAL INTERACTION ===")
temporal_time_interval_simple1_df <- as.data.frame(temporal_time_interval_simple1)
significant_temporal_time_interval <- temporal_time_interval_simple1_df[temporal_time_interval_simple1_df$p.value < 0.05, ]
if(nrow(significant_temporal_time_interval) > 0) {
temporal_time_interval_cohens_d <- data.frame()
for(i in seq_len(nrow(significant_temporal_time_interval))) {
comparison <- significant_temporal_time_interval[i, ]
# Extract the grouping variables
temporal_level <- as.character(comparison$temporal_DO)
interval_level <- as.character(comparison$INTERVAL)
# Get data for Past and Future within this temporal_DO × INTERVAL combination
past_data <- long_data_clean$DGEN_SCORE[
long_data_clean$temporal_DO == temporal_level &
long_data_clean$INTERVAL == interval_level &
long_data_clean$TIME == "Past"
]
future_data <- long_data_clean$DGEN_SCORE[
long_data_clean$temporal_DO == temporal_level &
long_data_clean$INTERVAL == interval_level &
long_data_clean$TIME == "Future"
]
if(length(past_data) > 0 && length(future_data) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(past_data, future_data)
result_row <- data.frame(
temporal_DO = temporal_level,
INTERVAL = interval_level,
Comparison = "Past vs Future",
n_Past = length(past_data),
n_Future = length(future_data),
Cohens_d = round(cohens_d_result$estimate, 5),
Effect_size = cohens_d_result$magnitude,
p_value = round(comparison$p.value, 5),
Estimated_difference = round(comparison$estimate, 5)
)
temporal_time_interval_cohens_d <- rbind(temporal_time_interval_cohens_d, result_row)
}
}
if(nrow(temporal_time_interval_cohens_d) > 0) {
print(temporal_time_interval_cohens_d)
}
} else {
cat("No significant TIME effects found within any temporal_DO × INTERVAL combination.\n")
}
# Calculate Cohen's d for three-way interaction
three_way_contrasts_df <- as.data.frame(three_way_contrasts)
significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
if(nrow(significant_three_way) > 0) {
three_way_cohens_d <- data.frame()
for(i in seq_len(nrow(significant_three_way))) {
comparison <- significant_three_way[i, ]
# Extract the grouping variables
domain_level <- as.character(comparison$DOMAIN)
interval_level <- as.character(comparison$INTERVAL)
# Get data for Past and Future within this DOMAIN × INTERVAL combination
past_data <- long_data_clean$DGEN_SCORE[
long_data_clean$DOMAIN == domain_level &
long_data_clean$INTERVAL == interval_level &
long_data_clean$TIME == "Past"
]
future_data <- long_data_clean$DGEN_SCORE[
long_data_clean$DOMAIN == domain_level &
long_data_clean$INTERVAL == interval_level &
long_data_clean$TIME == "Future"
]
if(length(past_data) > 0 && length(future_data) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(past_data, future_data)
result_row <- data.frame(
DOMAIN = domain_level,
INTERVAL = interval_level,
Comparison = "Past vs Future",
n_Past = length(past_data),
n_Future = length(future_data),
Cohens_d = round(cohens_d_result$estimate, 5),
Effect_size = cohens_d_result$magnitude,
p_value = round(comparison$p.value, 5),
Estimated_difference = round(comparison$estimate, 5)
)
three_way_cohens_d <- rbind(three_way_cohens_d, result_row)
}
}
if(nrow(three_way_cohens_d) > 0) {
print(three_way_cohens_d)
}
} else {
cat("No significant TIME effects found within any DOMAIN × INTERVAL combination.\n")
}
# =============================================================================
# COLOR PALETTE FOR PLOTS
# =============================================================================
# Define color palette for TIME
time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
# =============================================================================
# INTERACTION PLOT: temporal_DO × TIME × INTERVAL (temporal_DO on x-axis, TIME in legend, INTERVAL as facets)
# =============================================================================
# Create emmeans for temporal_DO × TIME × INTERVAL
emm_temporal_time_interval_plot <- emmeans(aov_model, ~ temporal_DO * TIME * INTERVAL)
# Prepare emmeans data frame for the plot
emmeans_temporal_time_interval_plot <- emm_temporal_time_interval_plot %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
temporal_DO = factor(temporal_DO, levels = c("01PAST", "02FUT")),
TIME = factor(TIME, levels = c("Past", "Future")),
INTERVAL = factor(INTERVAL, levels = c("5", "10", "5_10")),
x_pos = as.numeric(temporal_DO),
time_offset = (as.numeric(TIME) - 1.5) * 0.2,
x_dodged = x_pos + time_offset
)
# Create temporal_DO × TIME × INTERVAL interaction plot with facets
interaction_plot_temporal_time_interval <- ggplot(emmeans_temporal_time_interval_plot) +
geom_errorbar(
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.1,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
facet_wrap(~INTERVAL, nrow = 1, labeller = labeller(INTERVAL = c(
"5" = "Present v. 5 Years",
"10" = "Present v. 10 Years",
"5_10" = "5 Years v. 10 Years"
))) +
labs(
x = "Order",
y = "Mean absolute deviation",
title = "temporal_DO × TIME × INTERVAL Interaction (Estimated Marginal Means)"
) +
scale_x_continuous(
breaks = c(1, 2),
labels = c("Past First", "Future First"),
limits = c(0.5, 2.5)
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5),
strip.text = element_text(size = 11, face = "bold")
)
print(interaction_plot_temporal_time_interval)
# (Two-way interaction plots removed as requested)
# =============================================================================
# MAIN EFFECT PLOT: TIME (Estimated Marginal Means only)
# =============================================================================
# Create emmeans for TIME main effect
emm_time_main <- emmeans(aov_model, ~ TIME)
# Prepare emmeans data frame
emmeans_time_main <- emm_time_main %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TIME = factor(TIME, levels = c("Past", "Future"))
)
# Plot TIME main effect (only emmeans + error bars)
plot_time_main_effect <- ggplot(emmeans_time_main) +
geom_errorbar(
aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.15,
linewidth = 1,
alpha = 0.9
) +
geom_point(
aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Temporal Direction",
y = "Absolute difference from the present",
title = "Main Effect of TIME (Estimated Marginal Means)"
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(plot_time_main_effect)