# Mixed ANOVA Analysis for Past vs Future Differences # EOHI Experiment Data Analysis # Load required libraries library(tidyverse) library(ez) library(car) library(nortest) # For normality tests # Read the data data <- read.csv("eohi1/exp1.csv") # Display basic information about the dataset cat("Dataset dimensions:", dim(data), "\n") cat("Number of participants:", length(unique(data$pID)), "\n") # Check experimental conditions cat("\nExperimental conditions:\n") table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO) # STEP 1: PROPER DATA RESHAPING # Define domains with their categories domain_info <- data.frame( domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel", "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex", "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice", "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"), domain_type = c(rep("Preferences", 5), rep("Personality", 5), rep("Values", 5), rep("Life_Satisfaction", 5)), stringsAsFactors = FALSE ) # Function to reshape ALL domains at once with domain information reshape_all_domains <- function(data, domain_info) { all_long_data <- data.frame() for (i in 1:nrow(domain_info)) { domain_name <- domain_info$domain[i] domain_type <- domain_info$domain_type[i] past_col <- paste0("NPastDiff_", domain_name) fut_col <- paste0("NFutDiff_", domain_name) # Check if columns exist if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) { cat("Warning: Columns", past_col, "or", fut_col, "not found\n") next } # Create long format data for this domain past_data <- data %>% select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>% mutate( TimePerspective = "Past", Difference = .data[[past_col]], Domain_Type = domain_type, # e.g., "Preferences" Domain_Item = domain_name # e.g., "pref_read" ) %>% select(-all_of(past_col)) fut_data <- data %>% select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>% mutate( TimePerspective = "Future", Difference = .data[[fut_col]], Domain_Type = domain_type, Domain_Item = domain_name ) %>% select(-all_of(fut_col)) # Combine past and future data for this domain domain_long_data <- rbind(past_data, fut_data) all_long_data <- rbind(all_long_data, domain_long_data) } # Convert to factors all_long_data <- all_long_data %>% mutate( TimePerspective = as.factor(TimePerspective), Domain_Type = as.factor(Domain_Type), Domain_Item = as.factor(Domain_Item), pID = as.factor(pID) ) return(all_long_data) } # Reshape all data to long format cat("\nReshaping data to long format...\n") long_data <- reshape_all_domains(data, domain_info) cat("Long format data dimensions:", dim(long_data), "\n") cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n") cat("Domain types:", unique(long_data$Domain_Type), "\n") # STEP 2: ASSUMPTION CHECKING cat("\n", "="*80, "\n") cat("STEP 2: CHECKING ASSUMPTIONS\n") cat("="*80, "\n") # 2.1 Check for missing values missing_summary <- long_data %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n_total = n(), n_missing = sum(is.na(Difference)), pct_missing = round(100 * n_missing / n_total, 2), .groups = 'drop' ) cat("\nMissing values by domain and time perspective:\n") print(missing_summary) # Remove missing values long_data_clean <- long_data[!is.na(long_data$Difference), ] cat("\nData after removing missing values:", dim(long_data_clean), "\n") # 2.2 Outlier detection cat("\nChecking for outliers...\n") outlier_summary <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), mean = mean(Difference), sd = sd(Difference), q1 = quantile(Difference, 0.25), q3 = quantile(Difference, 0.75), iqr = q3 - q1, lower_bound = q1 - 1.5 * iqr, upper_bound = q3 + 1.5 * iqr, n_outliers = sum(Difference < lower_bound | Difference > upper_bound), .groups = 'drop' ) cat("Outlier summary (IQR method):\n") print(outlier_summary) # 2.3 Normality tests cat("\nTesting normality...\n") normality_results <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), shapiro_p = ifelse(n >= 3 & n <= 5000, shapiro.test(Difference)$p.value, NA), anderson_p = ifelse(n >= 7, ad.test(Difference)$p.value, NA), .groups = 'drop' ) %>% mutate( shapiro_normal = shapiro_p > 0.05, anderson_normal = anderson_p > 0.05, overall_normal = case_when( !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal, !is.na(shapiro_p) ~ shapiro_normal, !is.na(anderson_p) ~ anderson_normal, TRUE ~ NA ) ) cat("Normality test results:\n") print(normality_results) # 2.4 Homogeneity of variance (Levene's test) cat("\nTesting homogeneity of variance...\n") homogeneity_results <- long_data_clean %>% group_by(Domain_Type, Domain_Item) %>% summarise( levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1], homogeneous = levene_p > 0.05, .groups = 'drop' ) cat("Homogeneity of variance results:\n") print(homogeneity_results) # STEP 3: DESCRIPTIVE STATISTICS cat("\n", "="*80, "\n") cat("STEP 3: DESCRIPTIVE STATISTICS\n") cat("="*80, "\n") desc_stats <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), mean = mean(Difference), sd = sd(Difference), median = median(Difference), q1 = quantile(Difference, 0.25), q3 = quantile(Difference, 0.75), min = min(Difference), max = max(Difference), .groups = 'drop' ) cat("Descriptive statistics:\n") print(desc_stats) # Summary of all results cat("\n", "="*80, "\n") cat("SUMMARY OF ALL DOMAINS\n") cat("="*80, "\n") summary_df <- data.frame( Domain = character(), Past_Mean = numeric(), Future_Mean = numeric(), Cohen_d = numeric(), Significant = logical(), stringsAsFactors = FALSE ) for (domain in names(results_list)) { result <- results_list[[domain]] past_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Past"] fut_mean <- result$descriptive$mean[result$descriptive$TimePerspective == "Future"] cohens_d <- result$cohens_d # Check if significant (p < 0.05) significant <- FALSE if (!is.null(result$anova) && !is.null(result$anova$ANOVA)) { if ("TimePerspective" %in% result$anova$ANOVA$Effect) { p_val <- result$anova$ANOVA$p[result$anova$ANOVA$Effect == "TimePerspective"] significant <- !is.na(p_val) && p_val < 0.05 } } else if (!is.null(result$t_test)) { significant <- result$t_test$p.value < 0.05 } summary_df <- rbind(summary_df, data.frame( Domain = domain, Past_Mean = round(past_mean, 3), Future_Mean = round(fut_mean, 3), Cohen_d = round(cohens_d, 5), Significant = significant )) } # Sort by effect size (absolute value) summary_df <- summary_df[order(abs(summary_df$Cohen_d), decreasing = TRUE), ] print(summary_df) # Create visualization library(ggplot2) # Prepare data for plotting plot_data <- summary_df %>% mutate( Effect_Size = abs(Cohen_d), Direction = ifelse(Cohen_d > 0, "Past > Future", "Future > Past"), Domain_Type = case_when( grepl("pref_", Domain) ~ "Preferences", grepl("pers_", Domain) ~ "Personality", grepl("val_", Domain) ~ "Values", grepl("life_", Domain) ~ "Life Satisfaction", TRUE ~ "Other" ) ) # Effect size plot p1 <- ggplot(plot_data, aes(x = reorder(Domain, Effect_Size), y = Effect_Size, fill = Direction, alpha = Significant)) + geom_col() + coord_flip() + scale_alpha_manual(values = c(0.5, 1), name = "Significant\n(p < 0.05)") + scale_fill_manual(values = c("Past > Future" = "#E74C3C", "Future > Past" = "#3498DB")) + labs( title = "Effect Sizes: Past vs Future Differences", subtitle = "Absolute Cohen's d values across domains", x = "Domain", y = "|Cohen's d|", fill = "Direction" ) + theme_minimal() + theme(axis.text.y = element_text(size = 8)) print(p1) # Mean differences plot plot_data_long <- summary_df %>% select(Domain, Past_Mean, Future_Mean) %>% pivot_longer(cols = c(Past_Mean, Future_Mean), names_to = "TimePerspective", values_to = "Mean_Difference") %>% mutate(TimePerspective = gsub("_Mean", "", TimePerspective)) p2 <- ggplot(plot_data_long, aes(x = reorder(Domain, Mean_Difference), y = Mean_Difference, fill = TimePerspective)) + geom_col(position = "dodge") + coord_flip() + scale_fill_manual(values = c("Past" = "#E74C3C", "Future" = "#3498DB")) + labs( title = "Mean Differences by Time Perspective", subtitle = "Past vs Future difference scores", x = "Domain", y = "Mean Difference Score", fill = "Time Perspective" ) + theme_minimal() + theme(axis.text.y = element_text(size = 8)) print(p2) cat("\nAnalysis complete! Check the plots and summary table above.\n") cat("Key findings:\n") cat("- Domains with largest effect sizes:", paste(head(summary_df$Domain, 3), collapse = ", "), "\n") cat("- Number of significant differences:", sum(summary_df$Significant), "out of", nrow(summary_df), "\n")