eohi/.history/mixed anova - domain means_20250912125012.r
2025-12-23 15:47:09 -05:00

581 lines
19 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for Domain Means
# EOHI Experiment Data Analysis - Domain Level Analysis
# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
# NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(ggplot2) # For plotting
library(emmeans) # For post-hoc comparisons
# Read the data
data <- read.csv("eohi1/exp1.csv")
# Display basic information about the dataset
cat("Dataset dimensions:", dim(data), "\n")
cat("Number of participants:", length(unique(data$pID)), "\n")
# Check experimental conditions
cat("\nExperimental conditions:\n")
print(table(data$GROUP, data$TEMPORAL_DO, data$ITEM_DO))
# Check what domain mean columns are available
cat("\nChecking available domain mean columns:\n")
domain_mean_cols <- colnames(data)[grepl("mean_(pref|pers|val|life)", colnames(data))]
print(domain_mean_cols)
# Verify the specific variables we need
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
cat("Warning: Missing variables:", paste(missing_vars, collapse = ", "), "\n")
} else {
cat("All required domain mean variables found!\n")
}
# =============================================================================
# STEP 1: DATA PIVOTING TO LONG FORMAT
# =============================================================================
cat("STEP 1: DATA PIVOTING TO LONG FORMAT\n")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
cat("Domain mapping:\n")
print(domain_mapping)
# Function to pivot data to long format
pivot_domain_means <- function(data, domain_mapping) {
long_data <- data.frame()
for (i in 1:nrow(domain_mapping)) {
var_name <- domain_mapping$variable[i]
time_level <- domain_mapping$time[i]
domain_level <- domain_mapping$domain[i]
# Check if variable exists
if (!var_name %in% colnames(data)) {
cat("Warning: Variable", var_name, "not found in data\n")
next
}
# Create subset for this variable
subset_data <- data[, c("pID", "ResponseId", "GROUP", "TEMPORAL_DO", "ITEM_DO", var_name)]
subset_data$TIME <- time_level
subset_data$DOMAIN <- domain_level
subset_data$MEAN_DIFFERENCE <- subset_data[[var_name]]
subset_data[[var_name]] <- NULL # Remove original column
# Add to long data
long_data <- rbind(long_data, subset_data)
}
# Convert to factors with proper levels
long_data$TIME <- factor(long_data$TIME, levels = c("Past", "Future"))
long_data$DOMAIN <- factor(long_data$DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
long_data$pID <- as.factor(long_data$pID)
long_data$GROUP <- as.factor(long_data$GROUP)
long_data$TEMPORAL_DO <- as.factor(long_data$TEMPORAL_DO)
long_data$ITEM_DO <- as.factor(long_data$ITEM_DO)
return(long_data)
}
# Pivot data to long format
cat("\nPivoting data to long format...\n")
tryCatch({
long_data <- pivot_domain_means(data, domain_mapping)
cat("Data pivoting completed successfully.\n")
}, error = function(e) {
cat("Error in data pivoting:", e$message, "\n")
stop("Cannot proceed without proper data structure")
})
cat("Long format data dimensions:", dim(long_data), "\n")
cat("Unique participants:", length(unique(long_data$pID)), "\n")
cat("TIME levels:", paste(levels(long_data$TIME), collapse = ", "), "\n")
cat("DOMAIN levels:", paste(levels(long_data$DOMAIN), collapse = ", "), "\n")
# Check data types
cat("\nData types check:\n")
cat("TIME is factor:", is.factor(long_data$TIME), "\n")
cat("DOMAIN is factor:", is.factor(long_data$DOMAIN), "\n")
cat("pID is factor:", is.factor(long_data$pID), "\n")
cat("MEAN_DIFFERENCE is numeric:", is.numeric(long_data$MEAN_DIFFERENCE), "\n")
# Display structure and sample
cat("\nLong data structure:\n")
str(long_data)
cat("\nFirst 10 rows of long_data:\n")
print(utils::head(long_data, 10))
# Show example data for one participant
cat("\nExample: Participant 1 across all domains and times:\n")
participant_1_data <- long_data[long_data$pID == 1, c("pID", "GROUP", "TEMPORAL_DO", "ITEM_DO", "TIME", "DOMAIN", "MEAN_DIFFERENCE")]
print(participant_1_data)
# =============================================================================
# STEP 2: ASSUMPTION CHECKING
# =============================================================================
# 2.1 Check for missing values
cat("\n2.1 Missing Values Check:\n")
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
cat("Missing values by TIME and DOMAIN:\n")
print(missing_summary)
# Remove missing values
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
cat("\nData after removing missing values:", dim(long_data_clean), "\n")
# 2.2 Outlier detection
cat("\n2.2 Outlier Detection:\n")
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
cat("Outlier summary (IQR method):\n")
print(outlier_summary)
# 2.3 Normality tests
cat("\n2.3 Normality Tests:\n")
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
shapiro_p = ifelse(n >= 3 & n <= 5000,
shapiro.test(MEAN_DIFFERENCE)$p.value,
NA),
anderson_p = ifelse(n >= 7,
ad.test(MEAN_DIFFERENCE)$p.value,
NA),
.groups = 'drop'
) %>%
mutate(
shapiro_normal = shapiro_p > 0.05,
anderson_normal = anderson_p > 0.05,
overall_normal = case_when(
!is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
!is.na(shapiro_p) ~ shapiro_normal,
!is.na(anderson_p) ~ anderson_normal,
TRUE ~ NA
)
)
cat("Normality test results:\n")
print(normality_results)
# 2.4 Homogeneity of variance (Levene's test)
cat("\n2.4 Homogeneity of Variance Tests:\n")
# Test homogeneity across TIME within each DOMAIN
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
homogeneous = levene_p > 0.05,
.groups = 'drop'
)
cat("Homogeneity of variance across TIME within each DOMAIN:\n")
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
homogeneous = levene_p > 0.05,
.groups = 'drop'
)
cat("Homogeneity of variance across DOMAIN within each TIME:\n")
print(homogeneity_domain)
# =============================================================================
# STEP 3: DESCRIPTIVE STATISTICS
# =============================================================================
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 3: DESCRIPTIVE STATISTICS\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
# Overall descriptive statistics
desc_stats <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE), 5),
sd = round(sd(MEAN_DIFFERENCE), 5),
median = round(median(MEAN_DIFFERENCE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75), 5),
min = round(min(MEAN_DIFFERENCE), 5),
max = round(max(MEAN_DIFFERENCE), 5),
.groups = 'drop'
)
cat("Descriptive statistics by TIME and DOMAIN:\n")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_group <- long_data_clean %>%
group_by(GROUP, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE), 5),
sd = round(sd(MEAN_DIFFERENCE), 5),
.groups = 'drop'
)
cat("\nDescriptive statistics by GROUP, TIME, and DOMAIN:\n")
print(desc_stats_by_group)
# =============================================================================
# STEP 4: MIXED ANOVA ANALYSES
# =============================================================================
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 4: MIXED ANOVA ANALYSES\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
# 4.1 Main Mixed ANOVA
cat("\n4.1 Main Mixed ANOVA:\n")
cat("Within-subjects factors: TIME, DOMAIN\n")
cat("Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
cat(paste(rep("-", 50), collapse = ""), "\n")
tryCatch({
main_anova <- ezANOVA(
data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
within = c(TIME, DOMAIN),
between = c(GROUP, TEMPORAL_DO, ITEM_DO),
type = 3,
detailed = TRUE,
return_aov = TRUE
)
cat("Main ANOVA Results:\n")
print(main_anova)
# Check sphericity
if (!is.null(main_anova$`Mauchly's Test for Sphericity`)) {
cat("\nSphericity test results:\n")
print(main_anova$`Mauchly's Test for Sphericity`)
}
}, error = function(e) {
cat("Error in main ANOVA:", e$message, "\n")
# Try simpler model without all between-subjects factors
cat("Attempting simpler model with only GROUP as between-subjects factor...\n")
tryCatch({
simple_anova <- ezANOVA(
data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
within = c(TIME, DOMAIN),
between = GROUP,
type = 3,
detailed = TRUE,
return_aov = TRUE
)
cat("Simplified ANOVA Results:\n")
print(simple_anova)
main_anova <<- simple_anova
}, error = function(e2) {
cat("Simplified ANOVA also failed:", e2$message, "\n")
})
})
# 4.2 Domain-specific analyses
cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
cat(paste(rep("-", 50), collapse = ""), "\n")
domain_results <- list()
for (domain in levels(long_data_clean$DOMAIN)) {
cat("\nAnalyzing domain:", domain, "\n")
domain_data <- long_data_clean[long_data_clean$DOMAIN == domain, ]
tryCatch({
domain_anova <- ezANOVA(
data = domain_data,
dv = MEAN_DIFFERENCE,
wid = pID,
within = TIME,
between = c(GROUP, TEMPORAL_DO, ITEM_DO),
type = 3,
detailed = TRUE
)
cat("ANOVA results for", domain, ":\n")
print(domain_anova)
domain_results[[domain]] <- domain_anova
}, error = function(e) {
cat("Error in ANOVA for", domain, ":", e$message, "\n")
# Fallback to simpler analysis
cat("Attempting simpler repeated measures ANOVA...\n")
tryCatch({
simple_anova <- ezANOVA(
data = domain_data,
dv = MEAN_DIFFERENCE,
wid = pID,
within = TIME,
between = GROUP,
type = 3,
detailed = TRUE
)
print(simple_anova)
domain_results[[domain]] <- simple_anova
}, error = function(e2) {
cat("Simple ANOVA also failed:", e2$message, "\n")
})
})
}
# 4.3 Time-specific analyses
cat("\n4.3 Time-specific Mixed ANOVAs:\n")
cat(paste(rep("-", 50), collapse = ""), "\n")
time_results <- list()
for (time in levels(long_data_clean$TIME)) {
cat("\nAnalyzing time:", time, "\n")
time_data <- long_data_clean[long_data_clean$TIME == time, ]
tryCatch({
time_anova <- ezANOVA(
data = time_data,
dv = MEAN_DIFFERENCE,
wid = pID,
within = DOMAIN,
between = c(GROUP, TEMPORAL_DO, ITEM_DO),
type = 3,
detailed = TRUE
)
cat("ANOVA results for", time, ":\n")
print(time_anova)
time_results[[time]] <- time_anova
}, error = function(e) {
cat("Error in ANOVA for", time, ":", e$message, "\n")
# Fallback to simpler analysis
cat("Attempting simpler repeated measures ANOVA...\n")
tryCatch({
simple_anova <- ezANOVA(
data = time_data,
dv = MEAN_DIFFERENCE,
wid = pID,
within = DOMAIN,
between = GROUP,
type = 3,
detailed = TRUE
)
print(simple_anova)
time_results[[time]] <- simple_anova
}, error = function(e2) {
cat("Simple ANOVA also failed:", e2$message, "\n")
})
})
}
# =============================================================================
# STEP 5: POST-HOC ANALYSES
# =============================================================================
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 5: POST-HOC ANALYSES\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
# 5.1 Pairwise comparisons for significant effects
if (exists("main_anova") && !is.null(main_anova)) {
cat("\n5.1 Post-hoc comparisons for main effects:\n")
# Check for significant main effects and interactions
anova_table <- main_anova$ANOVA
if ("TIME" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME"] < 0.05) {
cat("Significant TIME main effect found. Computing pairwise comparisons...\n")
# Simple paired t-tests for TIME effect
past_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
future_means <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
if (length(past_means) == length(future_means)) {
time_t_test <- t.test(past_means, future_means, paired = TRUE)
cat("Paired t-test for TIME effect:\n")
cat("t =", round(time_t_test$statistic, 5),
", df =", time_t_test$parameter,
", p =", round(time_t_test$p.value, 5), "\n")
cat("Mean difference (Past - Future):", round(time_t_test$estimate, 5), "\n")
}
}
if ("DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "DOMAIN"] < 0.05) {
cat("Significant DOMAIN main effect found.\n")
# Pairwise comparisons between domains
domain_means <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
cat("Domain means:\n")
print(domain_means)
}
if ("TIME:DOMAIN" %in% anova_table$Effect && anova_table$p[anova_table$Effect == "TIME:DOMAIN"] < 0.05) {
cat("Significant TIME × DOMAIN interaction found.\n")
# Simple effects analysis
interaction_means <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(mean_diff = mean(MEAN_DIFFERENCE), .groups = 'drop')
cat("TIME × DOMAIN interaction means:\n")
print(interaction_means)
}
}
# =============================================================================
# STEP 6: EFFECT SIZES
# =============================================================================
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 6: EFFECT SIZES\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
if (exists("main_anova") && !is.null(main_anova)) {
anova_table <- main_anova$ANOVA
# Calculate partial eta squared for each effect
anova_table$partial_eta_squared <- round(anova_table$SSn / (anova_table$SSn + anova_table$SSd), 5)
cat("Effect sizes (partial eta squared):\n")
effect_sizes <- anova_table[, c("Effect", "partial_eta_squared")]
print(effect_sizes)
}
# =============================================================================
# STEP 7: SUMMARY AND INTERPRETATION
# =============================================================================
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("STEP 7: SUMMARY AND INTERPRETATION\n")
cat(paste(rep("=", 80), collapse = ""), "\n")
cat("Analysis Summary:\n")
cat("- Total participants:", length(unique(long_data_clean$pID)), "\n")
cat("- Total observations:", nrow(long_data_clean), "\n")
cat("- Within-subjects factors: TIME (Past vs Future), DOMAIN (Preferences, Personality, Values, Life)\n")
cat("- Between-subjects factors: GROUP, TEMPORAL_DO, ITEM_DO\n")
cat("- Dependent variable: Mean absolute differences in domain ratings\n")
cat("\nResearch Question:\n")
cat("Do participants rate changes in domains differently from past to now vs past to future?\n")
if (exists("main_anova") && !is.null(main_anova)) {
anova_table <- main_anova$ANOVA
cat("\nKey Findings:\n")
# Check for significant effects
significant_effects <- anova_table$Effect[anova_table$p < 0.05]
if (length(significant_effects) > 0) {
cat("Significant effects found:\n")
for (effect in significant_effects) {
p_val <- anova_table$p[anova_table$Effect == effect]
cat("-", effect, "(p =", round(p_val, 5), ")\n")
}
} else {
cat("No significant effects found at α = 0.05\n")
}
# Interpret TIME effect
if ("TIME" %in% anova_table$Effect) {
time_p <- anova_table$p[anova_table$Effect == "TIME"]
if (time_p < 0.05) {
cat("\nTIME Effect: Participants show different levels of change when comparing\n")
cat("past-to-now vs past-to-future perspectives (p =", round(time_p, 5), ")\n")
} else {
cat("\nTIME Effect: No significant difference between past-to-now and past-to-future\n")
cat("perspectives (p =", round(time_p, 5), ")\n")
}
}
# Interpret DOMAIN effect
if ("DOMAIN" %in% anova_table$Effect) {
domain_p <- anova_table$p[anova_table$Effect == "DOMAIN"]
if (domain_p < 0.05) {
cat("\nDOMAIN Effect: Different domains show different levels of perceived change\n")
cat("(p =", round(domain_p, 5), ")\n")
} else {
cat("\nDOMAIN Effect: No significant differences between domains in perceived change\n")
cat("(p =", round(domain_p, 5), ")\n")
}
}
# Interpret interaction
if ("TIME:DOMAIN" %in% anova_table$Effect) {
interaction_p <- anova_table$p[anova_table$Effect == "TIME:DOMAIN"]
if (interaction_p < 0.05) {
cat("\nTIME × DOMAIN Interaction: The effect of time perspective on perceived change\n")
cat("varies across domains (p =", round(interaction_p, 5), ")\n")
} else {
cat("\nTIME × DOMAIN Interaction: No significant interaction between time perspective\n")
cat("and domain (p =", round(interaction_p, 5), ")\n")
}
}
}
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
cat("ANALYSIS COMPLETE!\n")
cat(paste(rep("=", 80), collapse = ""), "\n")