169 lines
6.5 KiB
R
169 lines
6.5 KiB
R
# Mixed ANOVA Analysis for DGEN Variables
|
||
# EOHI Experiment 2 Data Analysis - DGEN Level Analysis with TIME, DOMAIN, and INTERVAL factors
|
||
# Variables: DGEN_past_5_Pref, DGEN_past_5_Pers, DGEN_past_5_Val, DGEN_past_10_Pref, DGEN_past_10_Pers, DGEN_past_10_Val,
|
||
# DGEN_fut_5_Pref, DGEN_fut_5_Pers, DGEN_fut_5_Val, DGEN_fut_10_Pref, DGEN_fut_10_Pers, DGEN_fut_10_Val
|
||
|
||
# Load required libraries
|
||
library(tidyverse)
|
||
library(ez)
|
||
library(car)
|
||
library(afex) # For aov_ez (cleaner ANOVA output)
|
||
library(nortest) # For normality tests
|
||
library(emmeans) # For post-hoc comparisons
|
||
library(purrr) # For map functions
|
||
library(effsize) # For Cohen's d calculations
|
||
library(effectsize) # For effect size calculations
|
||
|
||
# Global options to remove scientific notation
|
||
options(scipen = 999)
|
||
|
||
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
|
||
options(contrasts = c("contr.sum", "contr.poly"))
|
||
|
||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||
|
||
# Read the data
|
||
data <- read.csv("eohi2.csv")
|
||
|
||
# Display basic information about the dataset
|
||
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
|
||
print(paste("Number of participants:", length(unique(data$pID))))
|
||
|
||
# Verify the specific variables we need
|
||
required_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
|
||
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
|
||
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
|
||
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
|
||
|
||
missing_vars <- required_vars[!required_vars %in% colnames(data)]
|
||
if (length(missing_vars) > 0) {
|
||
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
|
||
} else {
|
||
print("All required DGEN variables found!")
|
||
}
|
||
|
||
# Define variable mapping for the three within-subjects factors
|
||
variable_mapping <- data.frame(
|
||
variable = required_vars,
|
||
TIME = c(rep("Past", 6), rep("Future", 6)),
|
||
DOMAIN = rep(c("Preferences", "Personality", "Values", "Preferences", "Personality", "Values"), 2),
|
||
INTERVAL = rep(c("5", "5", "5", "10", "10", "10"), 2),
|
||
stringsAsFactors = FALSE
|
||
)
|
||
|
||
# Variable mapping created
|
||
print("Variable mapping:")
|
||
print(variable_mapping)
|
||
|
||
# Efficient data pivoting using pivot_longer
|
||
long_data <- data %>%
|
||
select(pID, ResponseId, temporal_DO, interval_DO, all_of(required_vars)) %>%
|
||
pivot_longer(
|
||
cols = all_of(required_vars),
|
||
names_to = "variable",
|
||
values_to = "DGEN_SCORE"
|
||
) %>%
|
||
left_join(variable_mapping, by = "variable") %>%
|
||
# Convert to factors with proper levels
|
||
mutate(
|
||
TIME = factor(TIME, levels = c("Past", "Future")),
|
||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values")),
|
||
INTERVAL = factor(INTERVAL, levels = c("5", "10")),
|
||
pID = as.factor(pID),
|
||
temporal_DO = as.factor(temporal_DO),
|
||
interval_DO = as.factor(interval_DO)
|
||
) %>%
|
||
# Select final columns and remove any rows with missing values
|
||
select(pID, ResponseId, temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL, DGEN_SCORE) %>%
|
||
filter(!is.na(DGEN_SCORE))
|
||
|
||
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
|
||
print(paste("Number of participants:", length(unique(long_data$pID))))
|
||
print("Factor levels:")
|
||
print(paste("TIME:", paste(levels(long_data$TIME), collapse = ", ")))
|
||
print(paste("DOMAIN:", paste(levels(long_data$DOMAIN), collapse = ", ")))
|
||
print(paste("INTERVAL:", paste(levels(long_data$INTERVAL), collapse = ", ")))
|
||
print(paste("temporal_DO:", paste(levels(long_data$temporal_DO), collapse = ", ")))
|
||
print(paste("interval_DO:", paste(levels(long_data$interval_DO), collapse = ", ")))
|
||
|
||
# =============================================================================
|
||
# DESCRIPTIVE STATISTICS
|
||
# =============================================================================
|
||
|
||
# Overall descriptive statistics by TIME, DOMAIN, and INTERVAL
|
||
desc_stats <- long_data %>%
|
||
group_by(TIME, DOMAIN, INTERVAL) %>%
|
||
summarise(
|
||
n = n(),
|
||
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
|
||
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
|
||
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
|
||
median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
|
||
q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
|
||
q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
|
||
min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
|
||
max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
|
||
.groups = 'drop'
|
||
)
|
||
|
||
print("Descriptive statistics by TIME, DOMAIN, and INTERVAL:")
|
||
print(desc_stats)
|
||
|
||
# Descriptive statistics by between-subjects factors
|
||
desc_stats_by_between <- long_data %>%
|
||
group_by(temporal_DO, interval_DO, TIME, DOMAIN, INTERVAL) %>%
|
||
summarise(
|
||
n = n(),
|
||
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
|
||
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
|
||
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
|
||
.groups = 'drop'
|
||
)
|
||
|
||
print("Descriptive statistics by between-subjects factors:")
|
||
print(desc_stats_by_between)
|
||
|
||
# Calculate mean differences for key comparisons
|
||
print("\n=== KEY MEAN DIFFERENCES ===")
|
||
|
||
# Past vs Future differences for each DOMAIN × INTERVAL combination
|
||
past_future_diffs <- long_data %>%
|
||
group_by(DOMAIN, INTERVAL, pID) %>%
|
||
summarise(
|
||
past_score = DGEN_SCORE[TIME == "Past"],
|
||
future_score = DGEN_SCORE[TIME == "Future"],
|
||
difference = past_score - future_score,
|
||
.groups = 'drop'
|
||
) %>%
|
||
group_by(DOMAIN, INTERVAL) %>%
|
||
summarise(
|
||
n = n(),
|
||
mean_diff = round(mean(difference, na.rm = TRUE), 5),
|
||
sd_diff = round(sd(difference, na.rm = TRUE), 5),
|
||
se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
|
||
.groups = 'drop'
|
||
)
|
||
|
||
print("Past vs Future differences by DOMAIN × INTERVAL:")
|
||
print(past_future_diffs)
|
||
|
||
# 5 vs 10 interval differences for each TIME × DOMAIN combination
|
||
interval_diffs <- long_data %>%
|
||
group_by(TIME, DOMAIN, pID) %>%
|
||
summarise(
|
||
interval_5_score = DGEN_SCORE[INTERVAL == "5"],
|
||
interval_10_score = DGEN_SCORE[INTERVAL == "10"],
|
||
difference = interval_5_score - interval_10_score,
|
||
.groups = 'drop'
|
||
) %>%
|
||
group_by(TIME, DOMAIN) %>%
|
||
summarise(
|
||
n = n(),
|
||
mean_diff = round(mean(difference, na.rm = TRUE), 5),
|
||
sd_diff = round(sd(difference, na.rm = TRUE), 5),
|
||
se_diff = round(sd(difference, na.rm = TRUE) / sqrt(n()), 5),
|
||
.groups = 'drop'
|
||
)
|
||
|
||
print("\n5 vs 10 interval differences by TIME × DOMAIN:")
|
||
print(interval_diffs) |