eohi/.history/mixed anova_20250912124604.r
2025-12-23 15:47:09 -05:00

399 lines
14 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for Past vs Future Differences
# EOHI Experiment Data Analysis
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
# Read the data
data <- read.csv("eohi1/exp1.csv")
# Display basic information about the dataset
cat("Dataset dimensions:", dim(data), "\n")
cat("Number of participants:", length(unique(data$pID)), "\n")
# Check experimental conditions
cat("\nExperimental conditions:\n")
table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
# Check what columns are actually available
cat("\nChecking available columns with NPast and NFut prefixes:\n")
npast_cols <- colnames(data)[grepl("^NPast", colnames(data))]
nfut_cols <- colnames(data)[grepl("^NFut", colnames(data))]
cat("NPast columns found:\n")
print(npast_cols)
cat("\nNFut columns found:\n")
print(nfut_cols)
cat("\nTotal NPast columns:", length(npast_cols), "\n")
cat("Total NFut columns:", length(nfut_cols), "\n")
# STEP 1: PROPER DATA RESHAPING
# Define domains with their categories
domain_info <- data.frame(
domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
"life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
domain_type = c(rep("Preferences", 5),
rep("Personality", 5),
rep("Values", 5),
rep("Life_Satisfaction", 5)),
stringsAsFactors = FALSE
)
# Display domain_info
cat("\nDomain Information:\n")
print(domain_info)
cat("\nDomain type summary:\n")
print(table(domain_info$domain_type))
# Function to reshape ALL domains at once with domain information
reshape_all_domains <- function(data, domain_info) {
all_long_data <- data.frame()
for (i in 1:nrow(domain_info)) {
domain_name <- domain_info$domain[i]
domain_type <- domain_info$domain_type[i]
past_col <- paste0("NPastDiff_", domain_name)
fut_col <- paste0("NFutDiff_", domain_name)
# Check if columns exist
if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
next
}
# Create long format data for this domain - using base R approach to avoid issues
# Past data
past_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
"demo_sex", "demo_age_1", "AOT_total", "CRT_correct", past_col)]
past_data$TimePerspective <- "Past"
past_data$Difference <- past_data[[past_col]]
past_data$Domain_Type <- domain_type
past_data$Domain_Item <- domain_name
past_data[[past_col]] <- NULL # Remove the original column
# Future data
fut_data <- data[, c("pID", "ResponseId", "GROUP", "TASK_DO", "TEMPORAL_DO", "ITEM_DO", "COC_DO",
"demo_sex", "demo_age_1", "AOT_total", "CRT_correct", fut_col)]
fut_data$TimePerspective <- "Future"
fut_data$Difference <- fut_data[[fut_col]]
fut_data$Domain_Type <- domain_type
fut_data$Domain_Item <- domain_name
fut_data[[fut_col]] <- NULL # Remove the original column
# Combine past and future data for this domain
domain_long_data <- rbind(past_data, fut_data)
all_long_data <- rbind(all_long_data, domain_long_data)
}
# Convert to factors with proper levels
all_long_data$TimePerspective <- factor(all_long_data$TimePerspective, levels = c("Past", "Future"))
all_long_data$Domain_Type <- factor(all_long_data$Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction"))
all_long_data$Domain_Item <- factor(all_long_data$Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
"life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"))
all_long_data$pID <- as.factor(all_long_data$pID)
return(all_long_data)
}
# Reshape all data to long format
cat("\nReshaping data to long format...\n")
long_data <- reshape_all_domains(data, domain_info)
cat("Long format data dimensions:", dim(long_data), "\n")
cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
cat("Domain type counts:\n")
print(table(long_data$Domain_Type))
# Display structure and sample of long_data
cat("\nLong data structure:\n")
str(long_data)
cat("\nFirst 10 rows of long_data:\n")
print(head(long_data, 10))
cat("\nColumn names:\n")
print(colnames(long_data))
# Show factor levels for domain variables
cat("\nDomain_Type factor levels:\n")
print(levels(long_data$Domain_Type))
cat("\nDomain_Item factor levels:\n")
print(levels(long_data$Domain_Item))
cat("\nTimePerspective factor levels:\n")
print(levels(long_data$TimePerspective))
# Show a sample with actual names instead of numbers
cat("\nSample data with actual names (first 6 rows):\n")
sample_data <- long_data[1:6, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
print(sample_data)
# Show a better example - one participant across multiple domains
cat("\nExample: Participant 1 across multiple domains (first 10 rows):\n")
participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TASK_DO", "TEMPORAL_DO", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
print(participant_1_data)
# Show structure explanation
cat("\nLong format explanation:\n")
cat("- Each participant appears", length(unique(long_data$Domain_Item)) * 2, "times total\n")
cat("- (", length(unique(long_data$Domain_Item)), "domains × 2 time perspectives)\n")
cat("- Total rows per participant:", length(unique(long_data$Domain_Item)) * 2, "\n")
cat("- Total participants:", length(unique(long_data$pID)), "\n")
cat("- Expected total rows:", length(unique(long_data$pID)) * length(unique(long_data$Domain_Item)) * 2, "\n")
cat("- Actual total rows:", nrow(long_data), "\n")
# STEP 2: ASSUMPTION CHECKING
cat("STEP 2: CHECKING ASSUMPTIONS\n")
head(long_data)
# 2.1 Check for missing values
missing_summary <- long_data %>%
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(Difference)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
cat("\nMissing values by domain and time perspective:\n")
print(missing_summary)
# Remove missing values
long_data_clean <- long_data[!is.na(long_data$Difference), ]
cat("\nData after removing missing values:", dim(long_data_clean), "\n")
# 2.2 Outlier detection
cat("\nChecking for outliers...\n")
outlier_summary <- long_data_clean %>%
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
summarise(
n = n(),
mean = mean(Difference),
sd = sd(Difference),
q1 = quantile(Difference, 0.25),
q3 = quantile(Difference, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
.groups = 'drop'
)
cat("Outlier summary (IQR method):\n")
print(outlier_summary)
# 2.3 Normality tests
cat("\nTesting normality...\n")
normality_results <- long_data_clean %>%
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
summarise(
n = n(),
shapiro_p = ifelse(n >= 3 & n <= 5000,
shapiro.test(Difference)$p.value,
NA),
anderson_p = ifelse(n >= 7,
ad.test(Difference)$p.value,
NA),
.groups = 'drop'
) %>%
mutate(
shapiro_normal = shapiro_p > 0.05,
anderson_normal = anderson_p > 0.05,
overall_normal = case_when(
!is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
!is.na(shapiro_p) ~ shapiro_normal,
!is.na(anderson_p) ~ anderson_normal,
TRUE ~ NA
)
)
cat("Normality test results:\n")
print(normality_results)
# 2.4 Homogeneity of variance (Levene's test)
cat("\nTesting homogeneity of variance...\n")
homogeneity_results <- long_data_clean %>%
group_by(Domain_Type, Domain_Item) %>%
summarise(
levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
homogeneous = levene_p > 0.05,
.groups = 'drop'
)
cat("Homogeneity of variance results:\n")
print(homogeneity_results)
# STEP 3: DESCRIPTIVE STATISTICS
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 3: DESCRIPTIVE STATISTICS\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
desc_stats <- long_data_clean %>%
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
summarise(
n = n(),
mean = mean(Difference),
sd = sd(Difference),
median = median(Difference),
q1 = quantile(Difference, 0.25),
q3 = quantile(Difference, 0.75),
min = min(Difference),
max = max(Difference),
.groups = 'drop'
)
cat("Descriptive statistics:\n")
print(desc_stats)
# STEP 4: MIXED ANOVA ANALYSES
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 4: MIXED ANOVA ANALYSES\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
# 4.1 Overall analysis across all domains
cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
cat(paste(rep("-", 50), collapse = ""), "\n")
tryCatch({
overall_anova <- ezANOVA(
data = long_data_clean,
dv = Difference,
wid = pID,
within = c(TimePerspective, Domain_Type),
between = c(GROUP, TASK_DO),
type = 3,
detailed = TRUE,
return_aov = TRUE
)
cat("Overall ANOVA Results:\n")
print(overall_anova)
# Check sphericity
if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
cat("\nSphericity test results:\n")
print(overall_anova$`Mauchly's Test for Sphericity`)
}
}, error = function(e) {
cat("Error in overall ANOVA:", e$message, "\n")
})
# 4.2 Domain-specific analyses
cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
cat(paste(rep("-", 50), collapse = ""), "\n")
domain_results <- list()
for (domain_type in unique(long_data_clean$Domain_Type)) {
cat("\nAnalyzing domain type:", domain_type, "\n")
domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
tryCatch({
domain_anova <- ezANOVA(
data = domain_data,
dv = Difference,
wid = pID,
within = c(TimePerspective, Domain_Item),
between = c(GROUP, TASK_DO),
type = 3,
detailed = TRUE
)
cat("ANOVA results for", domain_type, ":\n")
print(domain_anova)
domain_results[[domain_type]] <- domain_anova
}, error = function(e) {
cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
# Fallback to simpler analysis
cat("Attempting simpler repeated measures ANOVA...\n")
tryCatch({
simple_anova <- ezANOVA(
data = domain_data,
dv = Difference,
wid = pID,
within = TimePerspective,
between = c(GROUP, TASK_DO),
type = 3,
detailed = TRUE
)
print(simple_anova)
domain_results[[domain_type]] <- simple_anova
}, error = function(e2) {
cat("Simple ANOVA also failed:", e2$message, "\n")
})
})
}
# 4.3 Individual domain item analyses
cat("\n4.3 Individual Domain Item Analyses:\n")
cat(paste(rep("-", 50), collapse = ""), "\n")
item_results <- list()
for (domain_item in unique(long_data_clean$Domain_Item)) {
cat("\nAnalyzing individual item:", domain_item, "\n")
item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
tryCatch({
item_anova <- ezANOVA(
data = item_data,
dv = Difference,
wid = pID,
within = TimePerspective,
between = c(GROUP, TASK_DO),
type = 3,
detailed = TRUE
)
cat("ANOVA results for", domain_item, ":\n")
print(item_anova)
item_results[[domain_item]] <- item_anova
}, error = function(e) {
cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
# Fallback to paired t-test
past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
if (length(past_vals) > 1 && length(fut_vals) > 1) {
t_test <- t.test(past_vals, fut_vals, paired = TRUE)
cat("Fallback paired t-test for", domain_item, ":\n")
cat("t =", round(t_test$statistic, 3),
", df =", t_test$parameter,
", p =", round(t_test$p.value, 5), "\n")
item_results[[domain_item]] <- t_test
}
})
}
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("ANALYSIS COMPLETE!\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
cat("Summary:\n")
cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
cat("- Individual item analyses completed:", length(item_results), "\n")