eohi/.history/eohi2/mixed anova - DGEN_20251003150009.r
2025-12-23 15:47:09 -05:00

169 lines
6.5 KiB
R
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Mixed ANOVA Analysis for DGEN Variables
# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
# DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data
data <- read.csv("eohi2.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$pID))))
# Verify the specific variables we need
required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required DGEN variables found!")
}
# Define variable mapping for the three within-subjects factors
variable_mapping <- data.frame(
variable = required_vars,
TIME = c(rep("Past", 6), rep("Future", 6)),
DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
stringsAsFactors = FALSE
)
# Variable mapping created
print("Variable mapping:")
print(variable_mapping)
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "DGEN_SCORE"
) %>%
left_join(variable_mapping, by = "variable") %>%
# Convert to factors with proper levels
mutate(
TIME = factor(TIME, levels = c("Past", "Future")),
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
INTERVAL = factor(INTERVAL, levels = c("5", "10")),
pID = as.factor(pID),
temporal_DO = as.factor(temporal_DO),
interval_DO = as.factor(interval_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
filter(!is.na(DGEN_SCORE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$pID))))
print("Factor levels:")
print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
desc_stats <- long_data %>%
group_by(TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_between <- long_data %>%
group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by between-subjects factors:")
print(desc_stats_by_between)
# Calculate mean differences for key comparisons
print("\n=== KEY MEAN DIFFERENCES ===")
# Past vs Future differences for each DOMAIN × INTERVAL combination
past_future_diffs <- long_data %>%
group_by(DOMAIN, INTERVAL, pID) %>%
summarise(
past_score = DGEN_SCORE[TIME == "Past"],
future_score = DGEN_SCORE[TIME == "Future"],
difference = past_score - future_score,
.groups = 'drop'
) %>%
group_by(DOMAIN, INTERVAL) %>%
summarise(
n = n(),
mean_diff = round(mean(difference, na.rm = TRUE), 5),
sd_diff = round(sd(difference, na.rm = TRUE), 5),
se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
.groups = 'drop'
)
print("Past vs Future differences by DOMAIN × INTERVAL:")
print(past_future_diffs)
# 5 vs 10 interval differences for each TIME × DOMAIN combination
interval_diffs <- long_data %>%
group_by(TIME, DOMAIN, pID) %>%
summarise(
interval_5_score = DGEN_SCORE[INTERVAL == "5"],
interval_10_score = DGEN_SCORE[INTERVAL == "10"],
difference = interval_5_score - interval_10_score,
.groups = 'drop'
) %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean_diff = round(mean(difference, na.rm = TRUE), 5),
sd_diff = round(sd(difference, na.rm = TRUE), 5),
se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
.groups = 'drop'
)
print("\n5 vs 10 interval differences by TIME × DOMAIN:")
print(interval_diffs)