# Mixed ANOVA Analysis for Past vs Future Differences
# EOHI Experiment Data Analysis

# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(nortest)      # For normality tests

# Read the data
data <- read.csv("eohi1/exp1.csv")

# Display basic information about the dataset
cat("Dataset dimensions:", dim(data), "\n")
cat("Number of participants:", length(unique(data$pID)), "\n")

# Check experimental conditions
cat("\nExperimental conditions:\n")
table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)

# STEP 1: PROPER DATA RESHAPING
# Define domains with their categories
domain_info <- data.frame(
  domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
             "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
             "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
             "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
  domain_type = c(rep("Preferences", 5),
                  rep("Personality", 5),
                  rep("Values", 5),
                  rep("Life_Satisfaction", 5)),
  stringsAsFactors = FALSE
)

# Display domain_info
cat("\nDomain Information:\n")
print(domain_info)
cat("\nDomain type summary:\n")
print(table(domain_info$domain_type))

# Function to reshape ALL domains at once with domain information
reshape_all_domains <- function(data, domain_info) {
  all_long_data <- data.frame()
  
  for (i in 1:nrow(domain_info)) {
    domain_name <- domain_info$domain[i]
    domain_type <- domain_info$domain_type[i]
    
    past_col <- paste0("NPastDiff_", domain_name)
    fut_col <- paste0("NFutDiff_", domain_name)
    
    # Check if columns exist
    if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
      cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
      next
    }
    
    # Create long format data for this domain - using base R approach to avoid issues
    # Past data
    past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO", 
                          "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
    past_data$TimePerspective <- "Past"
    past_data$Difference <- past_data[[past_col]]
    past_data$Domain_Type <- domain_type
    past_data$Domain_Item <- domain_name
    past_data[[past_col]] <- NULL  # Remove the original column
    
    # Future data
    fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
                         "demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
    fut_data$TimePerspective <- "Future"
    fut_data$Difference <- fut_data[[fut_col]]
    fut_data$Domain_Type <- domain_type
    fut_data$Domain_Item <- domain_name
    fut_data[[fut_col]] <- NULL  # Remove the original column
    
    # Combine past and future data for this domain
    domain_long_data <- rbind(past_data, fut_data)
    all_long_data <- rbind(all_long_data, domain_long_data)
  }
  
  # Convert to factors with proper levels
  all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
  all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
  all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
                                                                           "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
                                                                           "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
                                                                           "life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
  all_long_data$pID <- as.factor(all_long_data$pID)
  
  return(all_long_data)
}

# Reshape all data to long format
cat("\nReshaping data to long format...\n")
long_data <- reshape_all_domains(data, domain_info)

cat("Long format data dimensions:", dim(long_data), "\n")
cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
cat("Domain type counts:\n")
print(table(long_data$Domain_Type))

# Display structure and sample of long_data
cat("\nLong data structure:\n")
str(long_data)

cat("\nFirst 10 rows of long_data:\n")
print(head(long_data, 10))

cat("\nColumn names:\n")
print(colnames(long_data))

# Show factor levels for domain variables
cat("\nDomain_Type factor levels:\n")
print(levels(long_data$Domain_Type))

cat("\nDomain_Item factor levels:\n")
print(levels(long_data$Domain_Item))

cat("\nTimePerspective factor levels:\n")
print(levels(long_data$TimePerspective))

# Show a sample with actual names instead of numbers
cat("\nSample data with actual names (first 6 rows):\n")
sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
print(sample_data)

# Show a better example - one participant across multiple domains
cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
participant_1_data <- long_data[long_data$pID == 1, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
print(participant_1_data)

# Show structure explanation
cat("\nLong format explanation:\n")
cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
cat("- Total participants:", length(unique(long_data$pID)), "\n")
cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
cat("- Actual total rows:", nrow(long_data), "\n")

# STEP 2: ASSUMPTION CHECKING
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 2: CHECKING ASSUMPTIONS\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
head(long_data)
# 2.1 Check for missing values
missing_summary <- long_data %>%
  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
  summarise(
    n_total = n(),
    n_missing = sum(is.na(Difference)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups = 'drop'
  )

cat("\nMissing values by domain and time perspective:\n")
print(missing_summary)

# Remove missing values
long_data_clean <- long_data[!is.na(long_data$Difference), ]
cat("\nData after removing missing values:", dim(long_data_clean), "\n")

# 2.2 Outlier detection
cat("\nChecking for outliers...\n")
outlier_summary <- long_data_clean %>%
  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
  summarise(
    n = n(),
    mean = mean(Difference),
    sd = sd(Difference),
    q1 = quantile(Difference, 0.25),
    q3 = quantile(Difference, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 1.5 * iqr,
    upper_bound = q3 + 1.5 * iqr,
    n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
    .groups = 'drop'
  )

cat("Outlier summary (IQR method):\n")
print(outlier_summary)

# 2.3 Normality tests
cat("\nTesting normality...\n")
normality_results <- long_data_clean %>%
  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
  summarise(
    n = n(),
    shapiro_p = ifelse(n >= 3 & n <= 5000, 
                      shapiro.test(Difference)$p.value, 
                      NA),
    anderson_p = ifelse(n >= 7, 
                       ad.test(Difference)$p.value, 
                       NA),
    .groups = 'drop'
  ) %>%
  mutate(
    shapiro_normal = shapiro_p > 0.05,
    anderson_normal = anderson_p > 0.05,
    overall_normal = case_when(
      !is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
      !is.na(shapiro_p) ~ shapiro_normal,
      !is.na(anderson_p) ~ anderson_normal,
      TRUE ~ NA
    )
  )

cat("Normality test results:\n")
print(normality_results)

# 2.4 Homogeneity of variance (Levene's test)
cat("\nTesting homogeneity of variance...\n")
homogeneity_results <- long_data_clean %>%
  group_by(Domain_Type, Domain_Item) %>%
  summarise(
    levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
    homogeneous = levene_p > 0.05,
    .groups = 'drop'
  )

cat("Homogeneity of variance results:\n")
print(homogeneity_results)

# STEP 3: DESCRIPTIVE STATISTICS
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 3: DESCRIPTIVE STATISTICS\n")
cat(paste(rep("=", 80), collapse = ""), "\n")

desc_stats <- long_data_clean %>%
  group_by(Domain_Type, Domain_Item, TimePerspective) %>%
  summarise(
    n = n(),
    mean = mean(Difference),
    sd = sd(Difference),
    median = median(Difference),
    q1 = quantile(Difference, 0.25),
    q3 = quantile(Difference, 0.75),
    min = min(Difference),
    max = max(Difference),
    .groups = 'drop'
  )

cat("Descriptive statistics:\n")
print(desc_stats)

# STEP 4: MIXED ANOVA ANALYSES
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 4: MIXED ANOVA ANALYSES\n")
cat(paste(rep("=", 80), collapse = ""), "\n")

# 4.1 Overall analysis across all domains
cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
cat(paste(rep("-", 50), collapse = ""), "\n")

tryCatch({
  overall_anova <- ezANOVA(
    data = long_data_clean,
    dv = Difference,
    wid = pID,
    within = c(TimePerspective, Domain_Type),
    between = c(GROUP, TASK_DO),
    type = 3,
    detailed = TRUE,
    return_aov = TRUE
  )
  
  cat("Overall ANOVA Results:\n")
  print(overall_anova)
  
  # Check sphericity
  if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
    cat("\nSphericity test results:\n")
    print(overall_anova$`Mauchly's Test for Sphericity`)
  }
  
}, error = function(e) {
  cat("Error in overall ANOVA:", e$message, "\n")
})

# 4.2 Domain-specific analyses
cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
cat(paste(rep("-", 50), collapse = ""), "\n")

domain_results <- list()

for (domain_type in unique(long_data_clean$Domain_Type)) {
  cat("\nAnalyzing domain type:", domain_type, "\n")
  
  domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
  
  tryCatch({
    domain_anova <- ezANOVA(
      data = domain_data,
      dv = Difference,
      wid = pID,
      within = c(TimePerspective, Domain_Item),
      between = c(GROUP, TASK_DO),
      type = 3,
      detailed = TRUE
    )
    
    cat("ANOVA results for", domain_type, ":\n")
    print(domain_anova)
    
    domain_results[[domain_type]] <- domain_anova
    
  }, error = function(e) {
    cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
    
    # Fallback to simpler analysis
    cat("Attempting simpler repeated measures ANOVA...\n")
    tryCatch({
      simple_anova <- ezANOVA(
        data = domain_data,
        dv = Difference,
        wid = pID,
        within = TimePerspective,
        between = c(GROUP, TASK_DO),
        type = 3,
        detailed = TRUE
      )
      print(simple_anova)
      domain_results[[domain_type]] <- simple_anova
    }, error = function(e2) {
      cat("Simple ANOVA also failed:", e2$message, "\n")
    })
  })
}

# 4.3 Individual domain item analyses
cat("\n4.3 Individual Domain Item Analyses:\n")
cat(paste(rep("-", 50), collapse = ""), "\n")

item_results <- list()

for (domain_item in unique(long_data_clean$Domain_Item)) {
  cat("\nAnalyzing individual item:", domain_item, "\n")
  
  item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
  
  tryCatch({
    item_anova <- ezANOVA(
      data = item_data,
      dv = Difference,
      wid = pID,
      within = TimePerspective,
      between = c(GROUP, TASK_DO),
      type = 3,
      detailed = TRUE
    )
    
    cat("ANOVA results for", domain_item, ":\n")
    print(item_anova)
    
    item_results[[domain_item]] <- item_anova
    
  }, error = function(e) {
    cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
    
    # Fallback to paired t-test
    past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
    fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
    
    if (length(past_vals) > 1 && length(fut_vals) > 1) {
      t_test <- t.test(past_vals, fut_vals, paired = TRUE)
      cat("Fallback paired t-test for", domain_item, ":\n")
      cat("t =", round(t_test$statistic, 3), 
          ", df =", t_test$parameter, 
          ", p =", round(t_test$p.value, 5), "\n")
      
      item_results[[domain_item]] <- t_test
    }
  })
}

cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("ANALYSIS COMPLETE!\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
cat("Summary:\n")
cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
cat("- Individual item analyses completed:", length(item_results), "\n")