# Mixed ANOVA Analysis for Domain Means # EOHI Experiment Data Analysis - Domain Level Analysis # Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life # NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life # Load required libraries library(tidyverse) library(ez) library(car) library(nortest) # For normality tests library(ggplot2) # For plotting library(emmeans) # For post-hoc comparisons # Read the data data <- read.csv("eohi1/exp1.csv") # Display basic information about the dataset cat("Dataset dimensions:", dim(data), "\n") cat("Number of participants:", length(unique(data$pID)), "\n") # Check experimental conditions cat("\nExperimental conditions:\n") print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO)) # Check what domain mean columns are available cat("\nChecking available domain mean columns:\n") domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))] print(domain_mean_cols) # Verify the specific variables we need required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life") missing_vars <- required_vars[!required_vars %in% colnames(data)] if (length(missing_vars) > 0) { cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n") } else { cat("All required domain mean variables found!\n") } # ============================================================================= # STEP 1: DATA PIVOTING TO LONG FORMAT # ============================================================================= cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n") # Define domain mapping domain_mapping <- data.frame( variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"), time = c(rep("Past", 4), rep("Future", 4)), domain = rep(c("Preferences", "Personality", "Values", "Life"), 2), stringsAsFactors = FALSE ) cat("Domain mapping:\n") print(domain_mapping) # Function to pivot data to long format pivot_domain_means <- function(data, domain_mapping) { long_data <- data.frame() for (i in 1:nrow(domain_mapping)) { var_name <- domain_mapping$variable[i] time_level <- domain_mapping$time[i] domain_level <- domain_mapping$domain[i] # Check if variable exists if (!var_name %in% colnames(data)) { cat("Warning: Variable", var_name, "not found in data\n") next } # Create subset for this variable subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)] subset_data$TIME <- time_level subset_data$DOMAIN <- domain_level subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]] subset_data[[var_name]] <- NULL # Remove original column # Add to long data long_data <- rbind(long_data, subset_data) } # Convert to factors with proper levels long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future")) long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")) long_data$pID <- as.factor(long_data$pID) long_data$GROUP <- as.factor(long_data$GROUP) long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO) long_data$ITEM_DO <- as.factor(long_data$ITEM_DO) return(long_data) } # Pivot data to long format cat("\nPivoting data to long format...\n") tryCatch({ long_data <- pivot_domain_means(data, domain_mapping) cat("Data pivoting completed successfully.\n") }, error = function(e) { cat("Error in data pivoting:", e$message, "\n") stop("Cannot proceed without proper data structure") }) cat("Long format data dimensions:", dim(long_data), "\n") cat("Unique participants:", length(unique(long_data$pID)), "\n") cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n") cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n") # Check data types cat("\nData types check:\n") cat("TIME is factor:", is.factor(long_data$TIME), "\n") cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n") cat("pID is factor:", is.factor(long_data$pID), "\n") cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n") # Display structure and sample cat("\nLong data structure:\n") str(long_data) cat("\nFirst 10 rows of long_data:\n") print(utils::head(long_data, 10)) # Show example data for one participant cat("\nExample: Participant 1 across all domains and times:\n") participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")] print(participant_1_data) # ============================================================================= # STEP 2: ASSUMPTION CHECKING # ============================================================================= # 2.1 Check for missing values cat("\n2.1 Missing Values Check:\n") missing_summary <- long_data %>% group_by(TIME, DOMAIN) %>% summarise( n_total = n(), n_missing = sum(is.na(MEAN_DIFFERENCE)), pct_missing = round(100 * n_missing / n_total, 2), .groups = 'drop' ) cat("Missing values by TIME and DOMAIN:\n") print(missing_summary) # Remove missing values long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ] cat("\nData after removing missing values:", dim(long_data_clean), "\n") # 2.2 Outlier detection cat("\n2.2 Outlier Detection:\n") outlier_summary <- long_data_clean %>% group_by(TIME, DOMAIN) %>% summarise( n = n(), mean = mean(MEAN_DIFFERENCE), sd = sd(MEAN_DIFFERENCE), q1 = quantile(MEAN_DIFFERENCE, 0.25), q3 = quantile(MEAN_DIFFERENCE, 0.75), iqr = q3 - q1, lower_bound = q1 - 1.5 * iqr, upper_bound = q3 + 1.5 * iqr, n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound), .groups = 'drop' ) cat("Outlier summary (IQR method):\n") print(outlier_summary) # 2.3 Normality tests cat("\n2.3 Normality Tests:\n") normality_results <- long_data_clean %>% group_by(TIME, DOMAIN) %>% summarise( n = n(), shapiro_p = ifelse(n >= 3 & n <= 5000, shapiro.test(MEAN_DIFFERENCE)$p.value, NA), anderson_p = ifelse(n >= 7, ad.test(MEAN_DIFFERENCE)$p.value, NA), .groups = 'drop' ) %>% mutate( shapiro_normal = shapiro_p > 0.05, anderson_normal = anderson_p > 0.05, overall_normal = case_when( !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal, !is.na(shapiro_p) ~ shapiro_normal, !is.na(anderson_p) ~ anderson_normal, TRUE ~ NA ) ) cat("Normality test results:\n") print(normality_results) # 2.4 Homogeneity of variance (Levene's test) cat("\n2.4 Homogeneity of Variance Tests:\n") # Test homogeneity across TIME within each DOMAIN homogeneity_time <- long_data_clean %>% group_by(DOMAIN) %>% summarise( levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1], homogeneous = levene_p > 0.05, .groups = 'drop' ) cat("Homogeneity of variance across TIME within each DOMAIN:\n") print(homogeneity_time) # Test homogeneity across DOMAIN within each TIME homogeneity_domain <- long_data_clean %>% group_by(TIME) %>% summarise( levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1], homogeneous = levene_p > 0.05, .groups = 'drop' ) cat("Homogeneity of variance across DOMAIN within each TIME:\n") print(homogeneity_domain) # ============================================================================= # STEP 3: DESCRIPTIVE STATISTICS # ============================================================================= cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 3: DESCRIPTIVE STATISTICS\n") cat(paste(rep("=", 80), collapse = ""), "\n") # Overall descriptive statistics desc_stats <- long_data_clean %>% group_by(TIME, DOMAIN) %>% summarise( n = n(), mean = round(mean(MEAN_DIFFERENCE), 5), sd = round(sd(MEAN_DIFFERENCE), 5), median = round(median(MEAN_DIFFERENCE), 5), q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5), q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5), min = round(min(MEAN_DIFFERENCE), 5), max = round(max(MEAN_DIFFERENCE), 5), .groups = 'drop' ) cat("Descriptive statistics by TIME and DOMAIN:\n") print(desc_stats) # Descriptive statistics by between-subjects factors desc_stats_by_group <- long_data_clean %>% group_by(GROUP, TIME, DOMAIN) %>% summarise( n = n(), mean = round(mean(MEAN_DIFFERENCE), 5), sd = round(sd(MEAN_DIFFERENCE), 5), .groups = 'drop' ) cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n") print(desc_stats_by_group) # ============================================================================= # STEP 4: MIXED ANOVA ANALYSES # ============================================================================= cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 4: MIXED ANOVA ANALYSES\n") cat(paste(rep("=", 80), collapse = ""), "\n") # 4.1 Main Mixed ANOVA cat("\n4.1 Main Mixed ANOVA:\n") cat("Within-subjects factors: TIME, DOMAIN\n") cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n") cat(paste(rep("-", 50), collapse = ""), "\n") tryCatch({ main_anova <- ezANOVA( data = long_data_clean, dv = MEAN_DIFFERENCE, wid = pID, within = c(TIME, DOMAIN), between = c(GROUP, TEMPORAL_DO, ITEM_DO), type = 3, detailed = TRUE, return_aov = TRUE ) cat("Main ANOVA Results:\n") print(main_anova) # Check sphericity if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) { cat("\nSphericity test results:\n") print(main_anova$`Mauchly's Test for Sphericity`) } }, error = function(e) { cat("Error in main ANOVA:", e$message, "\n") # Try simpler model without all between-subjects factors cat("Attempting simpler model with only GROUP as between-subjects factor...\n") tryCatch({ simple_anova <- ezANOVA( data = long_data_clean, dv = MEAN_DIFFERENCE, wid = pID, within = c(TIME, DOMAIN), between = GROUP, type = 3, detailed = TRUE, return_aov = TRUE ) cat("Simplified ANOVA Results:\n") print(simple_anova) main_anova <<- simple_anova }, error = function(e2) { cat("Simplified ANOVA also failed:", e2$message, "\n") }) }) # 4.2 Domain-specific analyses cat("\n4.2 Domain-specific Mixed ANOVAs:\n") cat(paste(rep("-", 50), collapse = ""), "\n") domain_results <- list() for (domain in levels(long_data_clean$DOMAIN)) { cat("\nAnalyzing domain:", domain, "\n") domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ] tryCatch({ domain_anova <- ezANOVA( data = domain_data, dv = MEAN_DIFFERENCE, wid = pID, within = TIME, between = c(GROUP, TEMPORAL_DO, ITEM_DO), type = 3, detailed = TRUE ) cat("ANOVA results for", domain, ":\n") print(domain_anova) domain_results[[domain]] <- domain_anova }, error = function(e) { cat("Error in ANOVA for", domain, ":", e$message, "\n") # Fallback to simpler analysis cat("Attempting simpler repeated measures ANOVA...\n") tryCatch({ simple_anova <- ezANOVA( data = domain_data, dv = MEAN_DIFFERENCE, wid = pID, within = TIME, between = GROUP, type = 3, detailed = TRUE ) print(simple_anova) domain_results[[domain]] <- simple_anova }, error = function(e2) { cat("Simple ANOVA also failed:", e2$message, "\n") }) }) } # 4.3 Time-specific analyses cat("\n4.3 Time-specific Mixed ANOVAs:\n") cat(paste(rep("-", 50), collapse = ""), "\n") time_results <- list() for (time in levels(long_data_clean$TIME)) { cat("\nAnalyzing time:", time, "\n") time_data <- long_data_clean[long_data_clean$TIME == time, ] tryCatch({ time_anova <- ezANOVA( data = time_data, dv = MEAN_DIFFERENCE, wid = pID, within = DOMAIN, between = c(GROUP, TEMPORAL_DO, ITEM_DO), type = 3, detailed = TRUE ) cat("ANOVA results for", time, ":\n") print(time_anova) time_results[[time]] <- time_anova }, error = function(e) { cat("Error in ANOVA for", time, ":", e$message, "\n") # Fallback to simpler analysis cat("Attempting simpler repeated measures ANOVA...\n") tryCatch({ simple_anova <- ezANOVA( data = time_data, dv = MEAN_DIFFERENCE, wid = pID, within = DOMAIN, between = GROUP, type = 3, detailed = TRUE ) print(simple_anova) time_results[[time]] <- simple_anova }, error = function(e2) { cat("Simple ANOVA also failed:", e2$message, "\n") }) }) } # ============================================================================= # STEP 5: POST-HOC ANALYSES # ============================================================================= cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 5: POST-HOC ANALYSES\n") cat(paste(rep("=", 80), collapse = ""), "\n") # 5.1 Pairwise comparisons for significant effects if (exists("main_anova") && !is.null(main_anova)) { cat("\n5.1 Post-hoc comparisons for main effects:\n") # Check for significant main effects and interactions anova_table <- main_anova$ANOVA if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) { cat("Significant TIME main effect found. Computing pairwise comparisons...\n") # Simple paired t-tests for TIME effect past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"] future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"] if (length(past_means) == length(future_means)) { time_t_test <- t.test(past_means, future_means, paired = TRUE) cat("Paired t-test for TIME effect:\n") cat("t =", round(time_t_test$statistic, 5), ", df =", time_t_test$parameter, ", p =", round(time_t_test$p.value, 5), "\n") cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n") } } if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) { cat("Significant DOMAIN main effect found.\n") # Pairwise comparisons between domains domain_means <- long_data_clean %>% group_by(DOMAIN) %>% summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop') cat("Domain means:\n") print(domain_means) } if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) { cat("Significant TIME × DOMAIN interaction found.\n") # Simple effects analysis interaction_means <- long_data_clean %>% group_by(TIME, DOMAIN) %>% summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop') cat("TIME × DOMAIN interaction means:\n") print(interaction_means) } } # ============================================================================= # STEP 6: EFFECT SIZES # ============================================================================= cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 6: EFFECT SIZES\n") cat(paste(rep("=", 80), collapse = ""), "\n") if (exists("main_anova") && !is.null(main_anova)) { anova_table <- main_anova$ANOVA # Calculate partial eta squared for each effect anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5) cat("Effect sizes (partial eta squared):\n") effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")] print(effect_sizes) } # ============================================================================= # STEP 7: SUMMARY AND INTERPRETATION # ============================================================================= cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 7: SUMMARY AND INTERPRETATION\n") cat(paste(rep("=", 80), collapse = ""), "\n") cat("Analysis Summary:\n") cat("- Total participants:", length(unique(long_data_clean$pID)), "\n") cat("- Total observations:", nrow(long_data_clean), "\n") cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n") cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n") cat("- Dependent variable: Mean absolute differences in domain ratings\n") cat("\nResearch Question:\n") cat("Do participants rate changes in domains differently from past to now vs past to future?\n") if (exists("main_anova") && !is.null(main_anova)) { anova_table <- main_anova$ANOVA cat("\nKey Findings:\n") # Check for significant effects significant_effects <- anova_table$Effect[anova_table$p < 0.05] if (length(significant_effects) > 0) { cat("Significant effects found:\n") for (effect in significant_effects) { p_val <- anova_table$p[anova_table$Effect == effect] cat("-", effect, "(p =", round(p_val, 5), ")\n") } } else { cat("No significant effects found at α = 0.05\n") } # Interpret TIME effect if ("TIME" %in% anova_table$Effect) { time_p <- anova_table$p[anova_table$Effect == "TIME"] if (time_p < 0.05) { cat("\nTIME Effect: Participants show different levels of change when comparing\n") cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n") } else { cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n") cat("perspectives (p =", round(time_p, 5), ")\n") } } # Interpret DOMAIN effect if ("DOMAIN" %in% anova_table$Effect) { domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"] if (domain_p < 0.05) { cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n") cat("(p =", round(domain_p, 5), ")\n") } else { cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n") cat("(p =", round(domain_p, 5), ")\n") } } # Interpret interaction if ("TIME:DOMAIN" %in% anova_table$Effect) { interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"] if (interaction_p < 0.05) { cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n") cat("varies across domains (p =", round(interaction_p, 5), ")\n") } else { cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n") cat("and domain (p =", round(interaction_p, 5), ")\n") } } } cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("ANALYSIS COMPLETE!\n") cat(paste(rep("=", 80), collapse = ""), "\n")