Reviewed-on: #6 Co-authored-by: Irina Levit <irina.levit.rn@gmail.com> Co-committed-by: Irina Levit <irina.levit.rn@gmail.com>
434 lines
13 KiB
Plaintext
434 lines
13 KiB
Plaintext
---
|
|
title: "Mixed ANOVA - Domain General Vars"
|
|
author: ""
|
|
date: "`r Sys.Date()`"
|
|
output:
|
|
html_document:
|
|
toc: true
|
|
toc_float: true
|
|
code_folding: hide
|
|
---
|
|
|
|
```{r setup, include = FALSE}
|
|
knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = TRUE)
|
|
```
|
|
|
|
# Setup
|
|
|
|
```{r libraries}
|
|
library(tidyverse)
|
|
library(rstatix)
|
|
library(emmeans)
|
|
library(effectsize)
|
|
library(afex)
|
|
library(car)
|
|
|
|
options(scipen = 999)
|
|
afex::set_sum_contrasts()
|
|
```
|
|
|
|
# Data
|
|
|
|
```{r read-data}
|
|
# Data file is in parent of knit/ (eohi3/eohi3.csv)
|
|
df <- read.csv(
|
|
"/home/ladmin/Documents/DND/EOHI/eohi3/eohi3.csv",
|
|
stringsAsFactors = FALSE,
|
|
check.names = FALSE,
|
|
na.strings = "NA"
|
|
)
|
|
|
|
between_vars <- c("perspective", "temporalDO")
|
|
within_vars <- c(
|
|
"past_pref_DGEN", "past_pers_DGEN", "past_val_DGEN",
|
|
"fut_pref_DGEN", "fut_pers_DGEN", "fut_val_DGEN"
|
|
)
|
|
|
|
missing_vars <- setdiff(c(between_vars, within_vars, "pID"), names(df))
|
|
if (length(missing_vars) > 0) {
|
|
stop(paste("Missing required variables:", paste(missing_vars, collapse = ", ")))
|
|
}
|
|
|
|
anova_data <- df %>%
|
|
select(pID, all_of(between_vars), all_of(within_vars)) %>%
|
|
filter(
|
|
!is.na(perspective), perspective != "",
|
|
!is.na(temporalDO), temporalDO != ""
|
|
)
|
|
```
|
|
|
|
# Long format
|
|
|
|
```{r long-format}
|
|
long_data <- anova_data %>%
|
|
pivot_longer(
|
|
cols = all_of(within_vars),
|
|
names_to = "variable",
|
|
values_to = "DGEN_SCORE"
|
|
) %>%
|
|
mutate(
|
|
time = case_when(
|
|
grepl("^past_", variable) ~ "past",
|
|
grepl("^fut_", variable) ~ "fut",
|
|
TRUE ~ NA_character_
|
|
),
|
|
domain = case_when(
|
|
grepl("_pref_DGEN$", variable) ~ "pref",
|
|
grepl("_pers_DGEN$", variable) ~ "pers",
|
|
grepl("_val_DGEN$", variable) ~ "val",
|
|
TRUE ~ NA_character_
|
|
)
|
|
) %>%
|
|
mutate(
|
|
TIME = factor(time, levels = c("past", "fut")),
|
|
DOMAIN = factor(domain, levels = c("pref", "pers", "val")),
|
|
perspective = factor(perspective),
|
|
temporalDO = factor(temporalDO),
|
|
pID = factor(pID)
|
|
) %>%
|
|
select(pID, perspective, temporalDO, TIME, DOMAIN, DGEN_SCORE) %>%
|
|
filter(!is.na(DGEN_SCORE))
|
|
```
|
|
|
|
# Descriptive statistics
|
|
|
|
```{r desc-stats}
|
|
desc_stats <- long_data %>%
|
|
group_by(perspective, temporalDO, TIME, DOMAIN) %>%
|
|
summarise(
|
|
n = n(),
|
|
mean = round(mean(DGEN_SCORE), 5),
|
|
variance = round(var(DGEN_SCORE), 5),
|
|
sd = round(sd(DGEN_SCORE), 5),
|
|
median = round(median(DGEN_SCORE), 5),
|
|
q1 = round(quantile(DGEN_SCORE, 0.25), 5),
|
|
q3 = round(quantile(DGEN_SCORE, 0.75), 5),
|
|
min = round(min(DGEN_SCORE), 5),
|
|
max = round(max(DGEN_SCORE), 5),
|
|
.groups = "drop"
|
|
)
|
|
|
|
# Show all rows and columns (no truncation)
|
|
options(tibble.width = Inf)
|
|
print(desc_stats, n = Inf)
|
|
```
|
|
|
|
Interpretation:
|
|
1. Mean and median values are similar w/ slightly more variation than in the domain specific anova.
|
|
2. Highest to lowest group n size ratio is 1.14 (139/122). Acceptable ratio for ANOVA (under 1.5).
|
|
3. Highest to lowest overall group variance ratio is 1.40 (9.32/6.65). Acceptable ratio for ANOVA (under 4).
|
|
For the sake of consistency w/ the other EHI studies, I also calculated Hartley's F-max ratio.
|
|
The conservative F-max critical value is 1.60 (same as DS anova since number of groups and n sizes doesn't change), which is still higher than the highest observed F-max ratio of 1.28.
|
|
|
|
# Assumption checks
|
|
|
|
## Missing values
|
|
|
|
```{r missing}
|
|
missing_summary <- long_data %>%
|
|
group_by(perspective, temporalDO, TIME, DOMAIN) %>%
|
|
summarise(
|
|
n_total = n(),
|
|
n_missing = sum(is.na(DGEN_SCORE)),
|
|
pct_missing = round(100 * n_missing / n_total, 2),
|
|
.groups = "drop"
|
|
)
|
|
|
|
print(missing_summary, n = Inf)
|
|
```
|
|
|
|
No missing values. As expected.
|
|
|
|
## Outliers
|
|
|
|
```{r outliers}
|
|
outlier_summary <- long_data %>%
|
|
group_by(perspective, temporalDO, TIME, DOMAIN) %>%
|
|
summarise(
|
|
n = n(),
|
|
n_outliers = sum(abs(scale(DGEN_SCORE)) > 3),
|
|
.groups = "drop"
|
|
)
|
|
|
|
print(outlier_summary, n = Inf)
|
|
```
|
|
|
|
No outliers present. Good. Same as DS anova.
|
|
|
|
## Homogeneity of variance
|
|
|
|
```{r homogeneity}
|
|
homogeneity_between <- long_data %>%
|
|
group_by(TIME, DOMAIN) %>%
|
|
rstatix::levene_test(DGEN_SCORE ~ perspective * temporalDO)
|
|
|
|
print(homogeneity_between, n = Inf)
|
|
```
|
|
|
|
Levene's test is signiicant for 1 cell only: past-val. However, variance ratios and F-max tests show that any heteroscedasticity is mild.
|
|
|
|
## Normality (within-subjects residuals)
|
|
|
|
```{r normality}
|
|
resid_within <- long_data %>%
|
|
group_by(pID) %>%
|
|
mutate(person_mean = mean(DGEN_SCORE, na.rm = TRUE)) %>%
|
|
ungroup() %>%
|
|
mutate(resid = DGEN_SCORE - person_mean) %>%
|
|
pull(resid)
|
|
resid_within <- resid_within[!is.na(resid_within)]
|
|
|
|
n_resid <- length(resid_within)
|
|
if (n_resid < 3L) {
|
|
message("Too few within-subjects residuals (n < 3); skipping Shapiro-Wilk.")
|
|
} else {
|
|
resid_for_shapiro <- if (n_resid > 5000L) {
|
|
set.seed(1L)
|
|
sample(resid_within, 5000L)
|
|
} else {
|
|
resid_within
|
|
}
|
|
print(shapiro.test(resid_for_shapiro))
|
|
}
|
|
```
|
|
|
|
### Q-Q plot
|
|
|
|
```{r qqplot, fig.height = 4}
|
|
qqnorm(resid_within)
|
|
qqline(resid_within)
|
|
```
|
|
|
|
Shapiro-Wilk test is significant but is sensitive to large sample size.
|
|
QQ plot shows that strict centre residuals are normally distributed, but there is some deviation from normality.
|
|
ANOVA is robust to violations of normality w/ large sample size.
|
|
|
|
Overall, ANOVA can proceed.
|
|
|
|
# Mixed ANOVA
|
|
|
|
```{r anova}
|
|
aov_afex <- aov_ez(
|
|
id = "pID",
|
|
dv = "DGEN_SCORE",
|
|
data = long_data,
|
|
between = c("perspective", "temporalDO"),
|
|
within = c("TIME", "DOMAIN"),
|
|
type = 3,
|
|
anova_table = list(correction = "none")
|
|
)
|
|
|
|
print(aov_afex)
|
|
```
|
|
|
|
Mauchly's test of sphericity is not significant. Using uncorrected values for interpretation and analysis.
|
|
|
|
|
|
Significant main effects and interactions:
|
|
Effect df MSE F ges p
|
|
4 TIME 1, 518 3.11 8.39 ** .001 .004
|
|
8 DOMAIN 2, 1036 2.13 7.85 *** .001 <.001
|
|
10 temporalDO:DOMAIN 2, 1036 2.13 5.00 ** <.001 .007
|
|
15 perspective:temporalDO:TIME:DOMAIN 2, 1036 1.52 3.12 * <.001 .045
|
|
|
|
|
|
# Mauchly and epsilon
|
|
|
|
```{r mauchly}
|
|
anova_wide <- anova_data %>%
|
|
select(pID, perspective, temporalDO, all_of(within_vars)) %>%
|
|
filter(if_all(all_of(within_vars), ~ !is.na(.)))
|
|
response_matrix <- as.matrix(anova_wide[, within_vars])
|
|
rm_model <- lm(response_matrix ~ perspective * temporalDO, data = anova_wide)
|
|
idata <- data.frame(
|
|
TIME = factor(rep(c("past", "fut"), each = 3), levels = c("past", "fut")),
|
|
DOMAIN = factor(rep(c("pref", "pers", "val"), 2), levels = c("pref", "pers", "val"))
|
|
)
|
|
rm_anova <- car::Anova(rm_model, idata = idata, idesign = ~ TIME * DOMAIN, type = 3)
|
|
rm_summary <- summary(rm_anova, multivariate = FALSE)
|
|
if (!is.null(rm_summary$sphericity.tests)) {
|
|
print(rm_summary$sphericity.tests)
|
|
}
|
|
if (!is.null(rm_summary$epsilon)) {
|
|
print(rm_summary$epsilon)
|
|
}
|
|
```
|
|
|
|
# Post hoc comparisons
|
|
|
|
## TIME (main effect)
|
|
|
|
```{r posthoc-TIME}
|
|
emm_TIME <- emmeans(aov_afex, ~ TIME)
|
|
print(pairs(emm_TIME, adjust = "none"))
|
|
```
|
|
|
|
Supports presence of EHI effect.
|
|
|
|
## DOMAIN (main effect)
|
|
|
|
```{r posthoc-domain}
|
|
emm_DOMAIN <- emmeans(aov_afex, ~ DOMAIN)
|
|
print(pairs(emm_DOMAIN, adjust = "tukey"))
|
|
```
|
|
|
|
Only preference to values contrast is significant.
|
|
|
|
## temporalDO:DOMAIN
|
|
|
|
```{r posthoc-temporaldo-domain}
|
|
emmeans(aov_afex, pairwise ~ temporalDO | DOMAIN, adjust = "none")$contrasts
|
|
emmeans(aov_afex, pairwise ~ DOMAIN | temporalDO, adjust = "tukey")$contrasts
|
|
```
|
|
|
|
When grouped by domain, no contrasts are significant.
|
|
|
|
|
|
When grouped by temporalDO, some contrasts are significant:
|
|
|
|
Future-first temporal display order:
|
|
contrast estimate SE df t.ratio p.value
|
|
pref - pers 0.25065 0.0892 518 2.810 0.0142
|
|
|
|
|
|
Past-first temporal display order:
|
|
contrast estimate SE df t.ratio p.value
|
|
pref - val 0.33129 0.0895 518 3.702 0.0007
|
|
pers - val 0.32478 0.0921 518 3.527 0.0013
|
|
|
|
## perspective:temporalDO:TIME:DOMAIN
|
|
|
|
### contrasts for TIME grouped by perspective, temporalDO, and DOMAIN
|
|
```{r posthoc-fourway}
|
|
emm_fourway <- emmeans(aov_afex, pairwise ~ TIME | perspective * temporalDO * DOMAIN, adjust = "tukey")
|
|
print(emm_fourway$contrasts)
|
|
```
|
|
|
|
Significant contrasts:
|
|
|
|
contrast estimate SE df t.ratio p.value
|
|
past - fut 0.5285 0.179 518 2.957 0.0032 (self-perspective, personality domain, past-first temporal display order)
|
|
past - fut 0.5366 0.187 518 2.863 0.0044 (self-perspective, values domain, past-first temporal display order)
|
|
|
|
### contrasts for DOMAIN grouped by perspective, TIME, and temporalDO
|
|
```{r posthoc-fourway2}
|
|
emm_fourway2 <- emmeans(aov_afex, pairwise ~ DOMAIN | perspective * TIME * temporalDO, adjust = "tukey")
|
|
print(emm_fourway2$contrasts)
|
|
```
|
|
|
|
Significant contrasts:
|
|
|
|
contrast estimate SE df t.ratio p.value
|
|
pref - val 0.6259 0.166 518 3.778 0.0005 (other-perspective, past-directed questions, past-first temporal display order)
|
|
pers - val 0.4892 0.160 518 3.056 0.0066 (other-perspective, past-directed questions, past-first temporal display order)
|
|
pref - val 0.4309 0.168 518 2.559 0.0290 (self-perspective, future-directed questions, past-first temporal display order)
|
|
|
|
## Cohen's d (significant contrasts only)
|
|
|
|
```{r cohens-d-significant}
|
|
d_data <- anova_data %>%
|
|
mutate(
|
|
past_mean = (past_pref_DGEN + past_pers_DGEN + past_val_DGEN) / 3,
|
|
fut_mean = (fut_pref_DGEN + fut_pers_DGEN + fut_val_DGEN) / 3,
|
|
pref_mean = (past_pref_DGEN + fut_pref_DGEN) / 2,
|
|
pers_mean = (past_pers_DGEN + fut_pers_DGEN) / 2,
|
|
val_mean = (past_val_DGEN + fut_val_DGEN) / 2
|
|
)
|
|
|
|
cohens_d_results <- tibble(
|
|
contrast = character(),
|
|
condition = character(),
|
|
d = double()
|
|
)
|
|
|
|
# TIME main: past vs fut
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "TIME (past - fut)",
|
|
condition = "overall",
|
|
d = suppressMessages(effectsize::cohens_d(d_data$past_mean, d_data$fut_mean, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# DOMAIN main: pref vs val
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pref - val)",
|
|
condition = "overall",
|
|
d = suppressMessages(effectsize::cohens_d(d_data$pref_mean, d_data$val_mean, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# temporalDO:DOMAIN - future: pref vs pers
|
|
d_fut <- d_data %>% filter(temporalDO == "future")
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pref - pers)",
|
|
condition = "temporalDO = future",
|
|
d = suppressMessages(effectsize::cohens_d(d_fut$pref_mean, d_fut$pers_mean, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# temporalDO:DOMAIN - past: pref vs val, pers vs val
|
|
d_past <- d_data %>% filter(temporalDO == "past")
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pref - val)",
|
|
condition = "temporalDO = past",
|
|
d = suppressMessages(effectsize::cohens_d(d_past$pref_mean, d_past$val_mean, paired = TRUE)$Cohens_d)
|
|
) %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pers - val)",
|
|
condition = "temporalDO = past",
|
|
d = suppressMessages(effectsize::cohens_d(d_past$pers_mean, d_past$val_mean, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# 4-way TIME: self, past temporalDO, pers
|
|
d_self_past <- d_data %>% filter(perspective == "self", temporalDO == "past")
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "TIME (past - fut)",
|
|
condition = "self, past temporalDO, pers domain",
|
|
d = suppressMessages(effectsize::cohens_d(d_self_past$past_pers_DGEN, d_self_past$fut_pers_DGEN, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# 4-way TIME: self, past temporalDO, val
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "TIME (past - fut)",
|
|
condition = "self, past temporalDO, val domain",
|
|
d = suppressMessages(effectsize::cohens_d(d_self_past$past_val_DGEN, d_self_past$fut_val_DGEN, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# 4-way DOMAIN: other, past TIME, past temporalDO - pref vs val
|
|
d_other_past_tpast <- d_data %>% filter(perspective == "other", temporalDO == "past")
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pref - val)",
|
|
condition = "other, past TIME, past temporalDO",
|
|
d = suppressMessages(effectsize::cohens_d(d_other_past_tpast$past_pref_DGEN, d_other_past_tpast$past_val_DGEN, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# 4-way DOMAIN: other, past TIME, past temporalDO - pers vs val
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pers - val)",
|
|
condition = "other, past TIME, past temporalDO",
|
|
d = suppressMessages(effectsize::cohens_d(d_other_past_tpast$past_pers_DGEN, d_other_past_tpast$past_val_DGEN, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
# 4-way DOMAIN: self, fut TIME, past temporalDO - pref vs val
|
|
d_self_fut_tpast <- d_data %>% filter(perspective == "self", temporalDO == "past")
|
|
cohens_d_results <- cohens_d_results %>%
|
|
add_row(
|
|
contrast = "DOMAIN (pref - val)",
|
|
condition = "self, fut TIME, past temporalDO",
|
|
d = suppressMessages(effectsize::cohens_d(d_self_fut_tpast$fut_pref_DGEN, d_self_fut_tpast$fut_val_DGEN, paired = TRUE)$Cohens_d)
|
|
)
|
|
|
|
cohens_d_results %>%
|
|
mutate(d = round(d, 3)) %>%
|
|
print(n = Inf)
|
|
```
|
|
|
|
Size d Interpretation
|
|
Small 0.2 Weak effect
|
|
Medium 0.5 Moderate effect
|
|
Large 0.8 Strong effect |