# Mixed ANOVA Analysis for Past vs Future Differences # EOHI Experiment Data Analysis # Load required libraries library(tidyverse) library(ez) library(car) library(nortest) # For normality tests # Read the data data <- read.csv("eohi1/exp1.csv") # Display basic information about the dataset cat("Dataset dimensions:", dim(data), "\n") cat("Number of participants:", length(unique(data$pID)), "\n") # Check experimental conditions cat("\nExperimental conditions:\n") table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO) # STEP 1: PROPER DATA RESHAPING # Define domains with their categories domain_info <- data.frame( domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel", "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex", "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice", "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"), domain_type = c(rep("Preferences", 5), rep("Personality", 5), rep("Values", 5), rep("Life_Satisfaction", 5)), stringsAsFactors = FALSE ) # Display domain_info cat("\nDomain Information:\n") print(domain_info) cat("\nDomain type summary:\n") print(table(domain_info$domain_type)) # Function to reshape ALL domains at once with domain information reshape_all_domains <- function(data, domain_info) { all_long_data <- data.frame() for (i in 1:nrow(domain_info)) { domain_name <- domain_info$domain[i] domain_type <- domain_info$domain_type[i] past_col <- paste0("NPastDiff_", domain_name) fut_col <- paste0("NFutDiff_", domain_name) # Check if columns exist if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) { cat("Warning: Columns", past_col, "or", fut_col, "not found\n") next } # Create long format data for this domain past_data <- data %>% select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>% mutate( TimePerspective = "Past", Difference = .data[[past_col]], Domain_Type = domain_type, # e.g., "Preferences" Domain_Item = domain_name # e.g., "pref_read" ) %>% select(-all_of(past_col)) fut_data <- data %>% select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO, demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>% mutate( TimePerspective = "Future", Difference = .data[[fut_col]], Domain_Type = domain_type, Domain_Item = domain_name ) %>% select(-all_of(fut_col)) # Combine past and future data for this domain domain_long_data <- rbind(past_data, fut_data) all_long_data <- rbind(all_long_data, domain_long_data) } # Convert to factors all_long_data <- all_long_data %>% mutate( TimePerspective = as.factor(TimePerspective), Domain_Type = as.factor(Domain_Type), Domain_Item = as.factor(Domain_Item), pID = as.factor(pID) ) return(all_long_data) } # Reshape all data to long format cat("\nReshaping data to long format...\n") long_data <- reshape_all_domains(data, domain_info) cat("Long format data dimensions:", dim(long_data), "\n") cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n") cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n") cat("Domain type counts:\n") print(table(long_data$Domain_Type)) # Display structure and sample of long_data cat("\nLong data structure:\n") str(long_data) cat("\nFirst 10 rows of long_data:\n") print(head(long_data, 10)) cat("\nColumn names:\n") print(colnames(long_data)) # Show factor levels for domain variables cat("\nDomain_Type factor levels:\n") print(levels(long_data$Domain_Type)) cat("\nDomain_Item factor levels:\n") print(levels(long_data$Domain_Item)) cat("\nTimePerspective factor levels:\n") print(levels(long_data$TimePerspective)) # Show a sample with actual names instead of numbers cat("\nSample data with actual names (first 6 rows):\n") sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")] print(sample_data) # STEP 2: ASSUMPTION CHECKING cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 2: CHECKING ASSUMPTIONS\n") cat(paste(rep("=", 80), collapse = ""), "\n") head(long_data) # 2.1 Check for missing values missing_summary <- long_data %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n_total = n(), n_missing = sum(is.na(Difference)), pct_missing = round(100 * n_missing / n_total, 2), .groups = 'drop' ) cat("\nMissing values by domain and time perspective:\n") print(missing_summary) # Remove missing values long_data_clean <- long_data[!is.na(long_data$Difference), ] cat("\nData after removing missing values:", dim(long_data_clean), "\n") # 2.2 Outlier detection cat("\nChecking for outliers...\n") outlier_summary <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), mean = mean(Difference), sd = sd(Difference), q1 = quantile(Difference, 0.25), q3 = quantile(Difference, 0.75), iqr = q3 - q1, lower_bound = q1 - 1.5 * iqr, upper_bound = q3 + 1.5 * iqr, n_outliers = sum(Difference < lower_bound | Difference > upper_bound), .groups = 'drop' ) cat("Outlier summary (IQR method):\n") print(outlier_summary) # 2.3 Normality tests cat("\nTesting normality...\n") normality_results <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), shapiro_p = ifelse(n >= 3 & n <= 5000, shapiro.test(Difference)$p.value, NA), anderson_p = ifelse(n >= 7, ad.test(Difference)$p.value, NA), .groups = 'drop' ) %>% mutate( shapiro_normal = shapiro_p > 0.05, anderson_normal = anderson_p > 0.05, overall_normal = case_when( !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal, !is.na(shapiro_p) ~ shapiro_normal, !is.na(anderson_p) ~ anderson_normal, TRUE ~ NA ) ) cat("Normality test results:\n") print(normality_results) # 2.4 Homogeneity of variance (Levene's test) cat("\nTesting homogeneity of variance...\n") homogeneity_results <- long_data_clean %>% group_by(Domain_Type, Domain_Item) %>% summarise( levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1], homogeneous = levene_p > 0.05, .groups = 'drop' ) cat("Homogeneity of variance results:\n") print(homogeneity_results) # STEP 3: DESCRIPTIVE STATISTICS cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 3: DESCRIPTIVE STATISTICS\n") cat(paste(rep("=", 80), collapse = ""), "\n") desc_stats <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), mean = mean(Difference), sd = sd(Difference), median = median(Difference), q1 = quantile(Difference, 0.25), q3 = quantile(Difference, 0.75), min = min(Difference), max = max(Difference), .groups = 'drop' ) cat("Descriptive statistics:\n") print(desc_stats) # STEP 4: MIXED ANOVA ANALYSES cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 4: MIXED ANOVA ANALYSES\n") cat(paste(rep("=", 80), collapse = ""), "\n") # 4.1 Overall analysis across all domains cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n") cat(paste(rep("-", 50), collapse = ""), "\n") tryCatch({ overall_anova <- ezANOVA( data = long_data_clean, dv = Difference, wid = pID, within = c(TimePerspective, Domain_Type), between = c(GROUP, TASK_DO), type = 3, detailed = TRUE, return_aov = TRUE ) cat("Overall ANOVA Results:\n") print(overall_anova) # Check sphericity if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) { cat("\nSphericity test results:\n") print(overall_anova$`Mauchly's Test for Sphericity`) } }, error = function(e) { cat("Error in overall ANOVA:", e$message, "\n") }) # 4.2 Domain-specific analyses cat("\n4.2 Domain-specific Mixed ANOVAs:\n") cat(paste(rep("-", 50), collapse = ""), "\n") domain_results <- list() for (domain_type in unique(long_data_clean$Domain_Type)) { cat("\nAnalyzing domain type:", domain_type, "\n") domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ] tryCatch({ domain_anova <- ezANOVA( data = domain_data, dv = Difference, wid = pID, within = c(TimePerspective, Domain_Item), between = c(GROUP, TASK_DO), type = 3, detailed = TRUE ) cat("ANOVA results for", domain_type, ":\n") print(domain_anova) domain_results[[domain_type]] <- domain_anova }, error = function(e) { cat("Error in ANOVA for", domain_type, ":", e$message, "\n") # Fallback to simpler analysis cat("Attempting simpler repeated measures ANOVA...\n") tryCatch({ simple_anova <- ezANOVA( data = domain_data, dv = Difference, wid = pID, within = TimePerspective, between = c(GROUP, TASK_DO), type = 3, detailed = TRUE ) print(simple_anova) domain_results[[domain_type]] <- simple_anova }, error = function(e2) { cat("Simple ANOVA also failed:", e2$message, "\n") }) }) } # 4.3 Individual domain item analyses cat("\n4.3 Individual Domain Item Analyses:\n") cat(paste(rep("-", 50), collapse = ""), "\n") item_results <- list() for (domain_item in unique(long_data_clean$Domain_Item)) { cat("\nAnalyzing individual item:", domain_item, "\n") item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ] tryCatch({ item_anova <- ezANOVA( data = item_data, dv = Difference, wid = pID, within = TimePerspective, between = c(GROUP, TASK_DO), type = 3, detailed = TRUE ) cat("ANOVA results for", domain_item, ":\n") print(item_anova) item_results[[domain_item]] <- item_anova }, error = function(e) { cat("Error in ANOVA for", domain_item, ":", e$message, "\n") # Fallback to paired t-test past_vals <- item_data$Difference[item_data$TimePerspective == "Past"] fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"] if (length(past_vals) > 1 && length(fut_vals) > 1) { t_test <- t.test(past_vals, fut_vals, paired = TRUE) cat("Fallback paired t-test for", domain_item, ":\n") cat("t =", round(t_test$statistic, 3), ", df =", t_test$parameter, ", p =", round(t_test$p.value, 5), "\n") item_results[[domain_item]] <- t_test } }) } cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("ANALYSIS COMPLETE!\n") cat(paste(rep("=", 80), collapse = ""), "\n") cat("Summary:\n") cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n") cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n") cat("- Individual item analyses completed:", length(item_results), "\n")