eohi/.history/eohi1/regression e1 - ehi x sex x age_20251020175347.r
2025-12-23 15:47:09 -05:00

96 lines
2.6 KiB
R

options(scipen = 999)
library(dplyr)
library(car)
library(lmtest)
library(stargazer)
library(sandwich)
library(lmtest)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
filter(demo_sex != "Prefer not to say")
str(data)
colSums(is.na(data))
sapply(data, class)
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
#### REGRESSION MODELS ####
# MODEL 1: Age only - EOHI
age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
par(mfrow = c(2, 2))
plot(age_DGEN)
shapiro.test(residuals(age_DGEN))
print(summary(age_DGEN))
print(AIC(age_DGEN))
# MODEL 1: Age only - EHI
age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
par(mfrow = c(2, 2))
plot(age_domain)
shapiro.test(residuals(age_domain))
print(summary(age_domain))
print(AIC(age_domain))
# MODEL 2: Sex only - EOHI
sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(sex_DGEN)
shapiro.test(residuals(sex_DGEN))
print(summary(sex_DGEN))
print(AIC(sex_DGEN))
# MODEL 2: Sex only - EHI
sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(sex_domain)
shapiro.test(residuals(sex_domain))
print(summary(sex_domain))
print(AIC(sex_domain))
# MODEL 3: Age + Sex + Interaction - EOHI
interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(interaction_DGEN)
shapiro.test(residuals(interaction_DGEN))
vif(interaction_DGEN)
print(summary(interaction_DGEN))
print(AIC(interaction_DGEN))
# MODEL 3: Age + Sex + Interaction - EHI
interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(interaction_domain)
shapiro.test(residuals(interaction_domain))
vif(interaction_domain)
print(summary(interaction_domain))
print(AIC(interaction_domain))