# Mixed ANOVA Analysis for Past vs Future Differences # EOHI Experiment Data Analysis # Load required libraries library(tidyverse) library(ez) library(car) library(nortest) # For normality tests # Read the data data <- read.csv("eohi1/exp1.csv") # Display basic information about the dataset cat("Dataset dimensions:", dim(data), "\n") cat("Number of participants:", length(unique(data$pID)), "\n") # Check experimental conditions cat("\nExperimental conditions:\n") table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO) # STEP 1: PROPER DATA RESHAPING # Define domains with their categories domain_info <- data.frame( domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel", "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex", "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice", "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"), domain_type = c(rep("Preferences", 5), rep("Personality", 5), rep("Values", 5), rep("Life_Satisfaction", 5)), stringsAsFactors = FALSE ) # Display domain_info cat("\nDomain Information:\n") print(domain_info) cat("\nDomain type summary:\n") print(table(domain_info$domain_type)) # Function to reshape ALL domains at once with domain information reshape_all_domains <- function(data, domain_info) { all_long_data <- data.frame() for (i in 1:nrow(domain_info)) { domain_name <- domain_info$domain[i] domain_type <- domain_info$domain_type[i] past_col <- paste0("NPastDiff_", domain_name) fut_col <- paste0("NFutDiff_", domain_name) # Check if columns exist if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) { cat("Warning: Columns", past_col, "or", fut_col, "not found\n") next } # Create long format data for this domain - using base R approach to avoid issues # Past data past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)] past_data$TimePerspective <- "Past" past_data$Difference <- past_data[[past_col]] past_data$Domain_Type <- domain_type past_data$Domain_Item <- domain_name past_data[[past_col]] <- NULL # Remove the original column # Future data fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)] fut_data$TimePerspective <- "Future" fut_data$Difference <- fut_data[[fut_col]] fut_data$Domain_Type <- domain_type fut_data$Domain_Item <- domain_name fut_data[[fut_col]] <- NULL # Remove the original column # Combine past and future data for this domain domain_long_data <- rbind(past_data, fut_data) all_long_data <- rbind(all_long_data, domain_long_data) } # Convert to factors with proper levels all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future")) all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction")) all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel", "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex", "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice", "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")) all_long_data$pID <- as.factor(all_long_data$pID) return(all_long_data) } # Reshape all data to long format cat("\nReshaping data to long format...\n") long_data <- reshape_all_domains(data, domain_info) cat("Long format data dimensions:", dim(long_data), "\n") cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n") cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n") cat("Domain type counts:\n") print(table(long_data$Domain_Type)) # Display structure and sample of long_data cat("\nLong data structure:\n") str(long_data) cat("\nFirst 10 rows of long_data:\n") print(head(long_data, 10)) cat("\nColumn names:\n") print(colnames(long_data)) # Show factor levels for domain variables cat("\nDomain_Type factor levels:\n") print(levels(long_data$Domain_Type)) cat("\nDomain_Item factor levels:\n") print(levels(long_data$Domain_Item)) cat("\nTimePerspective factor levels:\n") print(levels(long_data$TimePerspective)) # Show a sample with actual names instead of numbers cat("\nSample data with actual names (first 6 rows):\n") sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")] print(sample_data) # Show a better example - one participant across multiple domains cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n") participant_1_data <- long_data[long_data$pID == 1, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")] print(participant_1_data) # Show structure explanation cat("\nLong format explanation:\n") cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n") cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n") cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n") cat("- Total participants:", length(unique(long_data$pID)), "\n") cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n") cat("- Actual total rows:", nrow(long_data), "\n") # STEP 2: ASSUMPTION CHECKING cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 2: CHECKING ASSUMPTIONS\n") cat(paste(rep("=", 80), collapse = ""), "\n") head(long_data) # 2.1 Check for missing values missing_summary <- long_data %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n_total = n(), n_missing = sum(is.na(Difference)), pct_missing = round(100 * n_missing / n_total, 2), .groups = 'drop' ) cat("\nMissing values by domain and time perspective:\n") print(missing_summary) # Remove missing values long_data_clean <- long_data[!is.na(long_data$Difference), ] cat("\nData after removing missing values:", dim(long_data_clean), "\n") # 2.2 Outlier detection cat("\nChecking for outliers...\n") outlier_summary <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), mean = mean(Difference), sd = sd(Difference), q1 = quantile(Difference, 0.25), q3 = quantile(Difference, 0.75), iqr = q3 - q1, lower_bound = q1 - 1.5 * iqr, upper_bound = q3 + 1.5 * iqr, n_outliers = sum(Difference < lower_bound | Difference > upper_bound), .groups = 'drop' ) cat("Outlier summary (IQR method):\n") print(outlier_summary) # 2.3 Normality tests cat("\nTesting normality...\n") normality_results <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), shapiro_p = ifelse(n >= 3 & n <= 5000, shapiro.test(Difference)$p.value, NA), anderson_p = ifelse(n >= 7, ad.test(Difference)$p.value, NA), .groups = 'drop' ) %>% mutate( shapiro_normal = shapiro_p > 0.05, anderson_normal = anderson_p > 0.05, overall_normal = case_when( !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal, !is.na(shapiro_p) ~ shapiro_normal, !is.na(anderson_p) ~ anderson_normal, TRUE ~ NA ) ) cat("Normality test results:\n") print(normality_results) # 2.4 Homogeneity of variance (Levene's test) cat("\nTesting homogeneity of variance...\n") homogeneity_results <- long_data_clean %>% group_by(Domain_Type, Domain_Item) %>% summarise( levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1], homogeneous = levene_p > 0.05, .groups = 'drop' ) cat("Homogeneity of variance results:\n") print(homogeneity_results) # STEP 3: DESCRIPTIVE STATISTICS cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 3: DESCRIPTIVE STATISTICS\n") cat(paste(rep("=", 80), collapse = ""), "\n") desc_stats <- long_data_clean %>% group_by(Domain_Type, Domain_Item, TimePerspective) %>% summarise( n = n(), mean = mean(Difference), sd = sd(Difference), median = median(Difference), q1 = quantile(Difference, 0.25), q3 = quantile(Difference, 0.75), min = min(Difference), max = max(Difference), .groups = 'drop' ) cat("Descriptive statistics:\n") print(desc_stats) # STEP 4: MIXED ANOVA ANALYSES cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("STEP 4: MIXED ANOVA ANALYSES\n") cat(paste(rep("=", 80), collapse = ""), "\n") # 4.1 Overall analysis across all domains cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n") cat(paste(rep("-", 50), collapse = ""), "\n") tryCatch({ overall_anova <- ezANOVA( data = long_data_clean, dv = Difference, wid = pID, within = c(TimePerspective, Domain_Type), between = c(GROUP, TASK_DO), type = 3, detailed = TRUE, return_aov = TRUE ) cat("Overall ANOVA Results:\n") print(overall_anova) # Check sphericity if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) { cat("\nSphericity test results:\n") print(overall_anova$`Mauchly's Test for Sphericity`) } }, error = function(e) { cat("Error in overall ANOVA:", e$message, "\n") }) # 4.2 Domain-specific analyses cat("\n4.2 Domain-specific Mixed ANOVAs:\n") cat(paste(rep("-", 50), collapse = ""), "\n") domain_results <- list() for (domain_type in unique(long_data_clean$Domain_Type)) { cat("\nAnalyzing domain type:", domain_type, "\n") domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ] tryCatch({ domain_anova <- ezANOVA( data = domain_data, dv = Difference, wid = pID, within = c(TimePerspective, Domain_Item), between = c(GROUP, TASK_DO), type = 3, detailed = TRUE ) cat("ANOVA results for", domain_type, ":\n") print(domain_anova) domain_results[[domain_type]] <- domain_anova }, error = function(e) { cat("Error in ANOVA for", domain_type, ":", e$message, "\n") # Fallback to simpler analysis cat("Attempting simpler repeated measures ANOVA...\n") tryCatch({ simple_anova <- ezANOVA( data = domain_data, dv = Difference, wid = pID, within = TimePerspective, between = c(GROUP, TASK_DO), type = 3, detailed = TRUE ) print(simple_anova) domain_results[[domain_type]] <- simple_anova }, error = function(e2) { cat("Simple ANOVA also failed:", e2$message, "\n") }) }) } # 4.3 Individual domain item analyses cat("\n4.3 Individual Domain Item Analyses:\n") cat(paste(rep("-", 50), collapse = ""), "\n") item_results <- list() for (domain_item in unique(long_data_clean$Domain_Item)) { cat("\nAnalyzing individual item:", domain_item, "\n") item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ] tryCatch({ item_anova <- ezANOVA( data = item_data, dv = Difference, wid = pID, within = TimePerspective, between = c(GROUP, TASK_DO), type = 3, detailed = TRUE ) cat("ANOVA results for", domain_item, ":\n") print(item_anova) item_results[[domain_item]] <- item_anova }, error = function(e) { cat("Error in ANOVA for", domain_item, ":", e$message, "\n") # Fallback to paired t-test past_vals <- item_data$Difference[item_data$TimePerspective == "Past"] fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"] if (length(past_vals) > 1 && length(fut_vals) > 1) { t_test <- t.test(past_vals, fut_vals, paired = TRUE) cat("Fallback paired t-test for", domain_item, ":\n") cat("t =", round(t_test$statistic, 3), ", df =", t_test$parameter, ", p =", round(t_test$p.value, 5), "\n") item_results[[domain_item]] <- t_test } }) } cat("\n", paste(rep("=", 80), collapse = ""), "\n") cat("ANALYSIS COMPLETE!\n") cat(paste(rep("=", 80), collapse = ""), "\n") cat("Summary:\n") cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n") cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n") cat("- Individual item analyses completed:", length(item_results), "\n")