# Regression Analysis - Assumption Checking # IVs: demo_sex, demo_age, demo_edu # DVs: eohiDGEN_mean, ehi_global_mean # Total: 6 regression models options(scipen = 999) # Load required libraries library(car) library(performance) library(see) library(ggplot2) library(gridExtra) library(dplyr) setwd("C:/Users/irina/Documents/DND/EOHI/eohi1") data <- read.csv("ehi1.csv") # Check data structure cat("Data dimensions:", dim(data), "\n") cat("Variables of interest:\n") cat("IVs: demo_sex, demo_age, demo_edu\n") cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n") # Check for missing values cat("Missing values check:\n") missing_summary <- data %>% select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>% summarise_all(~sum(is.na(.))) print(missing_summary) # Remove rows with missing values data_clean <- data %>% select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>% filter(complete.cases(.)) cat("\nClean data dimensions:", dim(data_clean), "\n") # Recode demo_sex as numeric for regression (0 = Female, 1 = Male) data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0) # Check demo_edu levels and recode if needed cat("\nEducation levels:\n") print(table(data_clean$demo_edu)) # Recode education as ordinal (assuming higher values = more education) edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate", "University - Undergraduate", "University - Graduate") data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels) # Verify recoding cat("\nSex recoding (0=Female, 1=Male):\n") print(table(data_clean$demo_sex_numeric)) cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n") print(table(data_clean$demo_edu_numeric))