378 lines
12 KiB
R
378 lines
12 KiB
R
# Mixed ANOVA Analysis for Past vs Future Differences
|
|
# EOHI Experiment Data Analysis
|
|
|
|
# Load required libraries
|
|
library(tidyverse)
|
|
library(ez)
|
|
library(car)
|
|
library(nortest) # For normality tests
|
|
|
|
# Read the data
|
|
data <- read.csv("eohi1/exp1.csv")
|
|
|
|
# Display basic information about the dataset
|
|
cat("Dataset dimensions:", dim(data), "\n")
|
|
cat("Number of participants:", length(unique(data$pID)), "\n")
|
|
|
|
# Check experimental conditions
|
|
cat("\nExperimental conditions:\n")
|
|
table(data$GROUP, data$TASK_DO, data$TEMPORAL_DO)
|
|
|
|
# STEP 1: PROPER DATA RESHAPING
|
|
# Define domains with their categories
|
|
domain_info <- data.frame(
|
|
domain = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
|
|
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
|
|
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
|
|
"life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change"),
|
|
domain_type = c(rep("Preferences", 5),
|
|
rep("Personality", 5),
|
|
rep("Values", 5),
|
|
rep("Life_Satisfaction", 5)),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
# Display domain_info
|
|
cat("\nDomain Information:\n")
|
|
print(domain_info)
|
|
cat("\nDomain type summary:\n")
|
|
print(table(domain_info$domain_type))
|
|
|
|
# Function to reshape ALL domains at once with domain information
|
|
reshape_all_domains <- function(data, domain_info) {
|
|
all_long_data <- data.frame()
|
|
|
|
for (i in 1:nrow(domain_info)) {
|
|
domain_name <- domain_info$domain[i]
|
|
domain_type <- domain_info$domain_type[i]
|
|
|
|
past_col <- paste0("NPastDiff_", domain_name)
|
|
fut_col <- paste0("NFutDiff_", domain_name)
|
|
|
|
# Check if columns exist
|
|
if (!(past_col %in% colnames(data)) || !(fut_col %in% colnames(data))) {
|
|
cat("Warning: Columns", past_col, "or", fut_col, "not found\n")
|
|
next
|
|
}
|
|
|
|
# Create long format data for this domain
|
|
past_data <- data %>%
|
|
select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
|
|
demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(past_col)) %>%
|
|
mutate(
|
|
TimePerspective = "Past",
|
|
Difference = .data[[past_col]],
|
|
Domain_Type = domain_type, # e.g., "Preferences"
|
|
Domain_Item = domain_name # e.g., "pref_read"
|
|
) %>%
|
|
select(-all_of(past_col))
|
|
|
|
fut_data <- data %>%
|
|
select(pID, ResponseId, GROUP, TASK_DO, TEMPORAL_DO, ITEM_DO, COC_DO,
|
|
demo_sex, demo_age_1, AOT_total, CRT_correct, all_of(fut_col)) %>%
|
|
mutate(
|
|
TimePerspective = "Future",
|
|
Difference = .data[[fut_col]],
|
|
Domain_Type = domain_type,
|
|
Domain_Item = domain_name
|
|
) %>%
|
|
select(-all_of(fut_col))
|
|
|
|
# Combine past and future data for this domain
|
|
domain_long_data <- rbind(past_data, fut_data)
|
|
all_long_data <- rbind(all_long_data, domain_long_data)
|
|
}
|
|
|
|
# Convert to factors with proper levels
|
|
all_long_data <- all_long_data %>%
|
|
mutate(
|
|
TimePerspective = factor(TimePerspective, levels = c("Past", "Future")),
|
|
Domain_Type = factor(Domain_Type, levels = c("Preferences", "Personality", "Values", "Life_Satisfaction")),
|
|
Domain_Item = factor(Domain_Item, levels = c("pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
|
|
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
|
|
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice",
|
|
"life_ideal", "life_excellent", "life_satisfied", "life_important", "life_change")),
|
|
pID = as.factor(pID)
|
|
)
|
|
|
|
return(all_long_data)
|
|
}
|
|
|
|
# Reshape all data to long format
|
|
cat("\nReshaping data to long format...\n")
|
|
long_data <- reshape_all_domains(data, domain_info)
|
|
|
|
cat("Long format data dimensions:", dim(long_data), "\n")
|
|
cat("Unique domains:", length(unique(long_data$Domain_Item)), "\n")
|
|
cat("Domain types:", paste(unique(long_data$Domain_Type), collapse = ", "), "\n")
|
|
cat("Domain type counts:\n")
|
|
print(table(long_data$Domain_Type))
|
|
|
|
# Display structure and sample of long_data
|
|
cat("\nLong data structure:\n")
|
|
str(long_data)
|
|
|
|
cat("\nFirst 10 rows of long_data:\n")
|
|
print(head(long_data, 10))
|
|
|
|
cat("\nColumn names:\n")
|
|
print(colnames(long_data))
|
|
|
|
# Show factor levels for domain variables
|
|
cat("\nDomain_Type factor levels:\n")
|
|
print(levels(long_data$Domain_Type))
|
|
|
|
cat("\nDomain_Item factor levels:\n")
|
|
print(levels(long_data$Domain_Item))
|
|
|
|
cat("\nTimePerspective factor levels:\n")
|
|
print(levels(long_data$TimePerspective))
|
|
|
|
# Show a sample with actual names instead of numbers
|
|
cat("\nSample data with actual names (first 6 rows):\n")
|
|
sample_data <- long_data[1:6, c("pID", "Domain_Type", "Domain_Item", "TimePerspective", "Difference")]
|
|
print(sample_data)
|
|
|
|
# STEP 2: ASSUMPTION CHECKING
|
|
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
|
|
cat("STEP 2: CHECKING ASSUMPTIONS\n")
|
|
cat(paste(rep("=", 80), collapse = ""), "\n")
|
|
head(long_data)
|
|
# 2.1 Check for missing values
|
|
missing_summary <- long_data %>%
|
|
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
|
|
summarise(
|
|
n_total = n(),
|
|
n_missing = sum(is.na(Difference)),
|
|
pct_missing = round(100 * n_missing / n_total, 2),
|
|
.groups = 'drop'
|
|
)
|
|
|
|
cat("\nMissing values by domain and time perspective:\n")
|
|
print(missing_summary)
|
|
|
|
# Remove missing values
|
|
long_data_clean <- long_data[!is.na(long_data$Difference), ]
|
|
cat("\nData after removing missing values:", dim(long_data_clean), "\n")
|
|
|
|
# 2.2 Outlier detection
|
|
cat("\nChecking for outliers...\n")
|
|
outlier_summary <- long_data_clean %>%
|
|
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
|
|
summarise(
|
|
n = n(),
|
|
mean = mean(Difference),
|
|
sd = sd(Difference),
|
|
q1 = quantile(Difference, 0.25),
|
|
q3 = quantile(Difference, 0.75),
|
|
iqr = q3 - q1,
|
|
lower_bound = q1 - 1.5 * iqr,
|
|
upper_bound = q3 + 1.5 * iqr,
|
|
n_outliers = sum(Difference < lower_bound | Difference > upper_bound),
|
|
.groups = 'drop'
|
|
)
|
|
|
|
cat("Outlier summary (IQR method):\n")
|
|
print(outlier_summary)
|
|
|
|
# 2.3 Normality tests
|
|
cat("\nTesting normality...\n")
|
|
normality_results <- long_data_clean %>%
|
|
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
|
|
summarise(
|
|
n = n(),
|
|
shapiro_p = ifelse(n >= 3 & n <= 5000,
|
|
shapiro.test(Difference)$p.value,
|
|
NA),
|
|
anderson_p = ifelse(n >= 7,
|
|
ad.test(Difference)$p.value,
|
|
NA),
|
|
.groups = 'drop'
|
|
) %>%
|
|
mutate(
|
|
shapiro_normal = shapiro_p > 0.05,
|
|
anderson_normal = anderson_p > 0.05,
|
|
overall_normal = case_when(
|
|
!is.na(shapiro_p) & !is.na(anderson_p) ~ shapiro_normal & anderson_normal,
|
|
!is.na(shapiro_p) ~ shapiro_normal,
|
|
!is.na(anderson_p) ~ anderson_normal,
|
|
TRUE ~ NA
|
|
)
|
|
)
|
|
|
|
cat("Normality test results:\n")
|
|
print(normality_results)
|
|
|
|
# 2.4 Homogeneity of variance (Levene's test)
|
|
cat("\nTesting homogeneity of variance...\n")
|
|
homogeneity_results <- long_data_clean %>%
|
|
group_by(Domain_Type, Domain_Item) %>%
|
|
summarise(
|
|
levene_p = leveneTest(Difference ~ TimePerspective)$`Pr(>F)`[1],
|
|
homogeneous = levene_p > 0.05,
|
|
.groups = 'drop'
|
|
)
|
|
|
|
cat("Homogeneity of variance results:\n")
|
|
print(homogeneity_results)
|
|
|
|
# STEP 3: DESCRIPTIVE STATISTICS
|
|
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
|
|
cat("STEP 3: DESCRIPTIVE STATISTICS\n")
|
|
cat(paste(rep("=", 80), collapse = ""), "\n")
|
|
|
|
desc_stats <- long_data_clean %>%
|
|
group_by(Domain_Type, Domain_Item, TimePerspective) %>%
|
|
summarise(
|
|
n = n(),
|
|
mean = mean(Difference),
|
|
sd = sd(Difference),
|
|
median = median(Difference),
|
|
q1 = quantile(Difference, 0.25),
|
|
q3 = quantile(Difference, 0.75),
|
|
min = min(Difference),
|
|
max = max(Difference),
|
|
.groups = 'drop'
|
|
)
|
|
|
|
cat("Descriptive statistics:\n")
|
|
print(desc_stats)
|
|
|
|
# STEP 4: MIXED ANOVA ANALYSES
|
|
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
|
|
cat("STEP 4: MIXED ANOVA ANALYSES\n")
|
|
cat(paste(rep("=", 80), collapse = ""), "\n")
|
|
|
|
# 4.1 Overall analysis across all domains
|
|
cat("\n4.1 Overall Mixed ANOVA (all domains combined):\n")
|
|
cat(paste(rep("-", 50), collapse = ""), "\n")
|
|
|
|
tryCatch({
|
|
overall_anova <- ezANOVA(
|
|
data = long_data_clean,
|
|
dv = Difference,
|
|
wid = pID,
|
|
within = c(TimePerspective, Domain_Type),
|
|
between = c(GROUP, TASK_DO),
|
|
type = 3,
|
|
detailed = TRUE,
|
|
return_aov = TRUE
|
|
)
|
|
|
|
cat("Overall ANOVA Results:\n")
|
|
print(overall_anova)
|
|
|
|
# Check sphericity
|
|
if (!is.null(overall_anova$`Mauchly's Test for Sphericity`)) {
|
|
cat("\nSphericity test results:\n")
|
|
print(overall_anova$`Mauchly's Test for Sphericity`)
|
|
}
|
|
|
|
}, error = function(e) {
|
|
cat("Error in overall ANOVA:", e$message, "\n")
|
|
})
|
|
|
|
# 4.2 Domain-specific analyses
|
|
cat("\n4.2 Domain-specific Mixed ANOVAs:\n")
|
|
cat(paste(rep("-", 50), collapse = ""), "\n")
|
|
|
|
domain_results <- list()
|
|
|
|
for (domain_type in unique(long_data_clean$Domain_Type)) {
|
|
cat("\nAnalyzing domain type:", domain_type, "\n")
|
|
|
|
domain_data <- long_data_clean[long_data_clean$Domain_Type == domain_type, ]
|
|
|
|
tryCatch({
|
|
domain_anova <- ezANOVA(
|
|
data = domain_data,
|
|
dv = Difference,
|
|
wid = pID,
|
|
within = c(TimePerspective, Domain_Item),
|
|
between = c(GROUP, TASK_DO),
|
|
type = 3,
|
|
detailed = TRUE
|
|
)
|
|
|
|
cat("ANOVA results for", domain_type, ":\n")
|
|
print(domain_anova)
|
|
|
|
domain_results[[domain_type]] <- domain_anova
|
|
|
|
}, error = function(e) {
|
|
cat("Error in ANOVA for", domain_type, ":", e$message, "\n")
|
|
|
|
# Fallback to simpler analysis
|
|
cat("Attempting simpler repeated measures ANOVA...\n")
|
|
tryCatch({
|
|
simple_anova <- ezANOVA(
|
|
data = domain_data,
|
|
dv = Difference,
|
|
wid = pID,
|
|
within = TimePerspective,
|
|
between = c(GROUP, TASK_DO),
|
|
type = 3,
|
|
detailed = TRUE
|
|
)
|
|
print(simple_anova)
|
|
domain_results[[domain_type]] <- simple_anova
|
|
}, error = function(e2) {
|
|
cat("Simple ANOVA also failed:", e2$message, "\n")
|
|
})
|
|
})
|
|
}
|
|
|
|
# 4.3 Individual domain item analyses
|
|
cat("\n4.3 Individual Domain Item Analyses:\n")
|
|
cat(paste(rep("-", 50), collapse = ""), "\n")
|
|
|
|
item_results <- list()
|
|
|
|
for (domain_item in unique(long_data_clean$Domain_Item)) {
|
|
cat("\nAnalyzing individual item:", domain_item, "\n")
|
|
|
|
item_data <- long_data_clean[long_data_clean$Domain_Item == domain_item, ]
|
|
|
|
tryCatch({
|
|
item_anova <- ezANOVA(
|
|
data = item_data,
|
|
dv = Difference,
|
|
wid = pID,
|
|
within = TimePerspective,
|
|
between = c(GROUP, TASK_DO),
|
|
type = 3,
|
|
detailed = TRUE
|
|
)
|
|
|
|
cat("ANOVA results for", domain_item, ":\n")
|
|
print(item_anova)
|
|
|
|
item_results[[domain_item]] <- item_anova
|
|
|
|
}, error = function(e) {
|
|
cat("Error in ANOVA for", domain_item, ":", e$message, "\n")
|
|
|
|
# Fallback to paired t-test
|
|
past_vals <- item_data$Difference[item_data$TimePerspective == "Past"]
|
|
fut_vals <- item_data$Difference[item_data$TimePerspective == "Future"]
|
|
|
|
if (length(past_vals) > 1 && length(fut_vals) > 1) {
|
|
t_test <- t.test(past_vals, fut_vals, paired = TRUE)
|
|
cat("Fallback paired t-test for", domain_item, ":\n")
|
|
cat("t =", round(t_test$statistic, 3),
|
|
", df =", t_test$parameter,
|
|
", p =", round(t_test$p.value, 5), "\n")
|
|
|
|
item_results[[domain_item]] <- t_test
|
|
}
|
|
})
|
|
}
|
|
|
|
cat("\n", paste(rep("=", 80), collapse = ""), "\n")
|
|
cat("ANALYSIS COMPLETE!\n")
|
|
cat(paste(rep("=", 80), collapse = ""), "\n")
|
|
cat("Summary:\n")
|
|
cat("- Total domains analyzed:", length(unique(long_data_clean$Domain_Item)), "\n")
|
|
cat("- Domain types analyzed:", length(unique(long_data_clean$Domain_Type)), "\n")
|
|
cat("- Individual item analyses completed:", length(item_results), "\n")
|