eohi/.history/eohi1/regressions e1 - assumptions_20251016142437.r
2025-12-23 15:47:09 -05:00

56 lines
1.8 KiB
R

# Regression Analysis - Assumption Checking
# IVs: demo_sex, demo_age, demo_edu
# DVs: eohiDGEN_mean, ehi_global_mean
# Total: 6 regression models
options(scipen = 999)
# Load required libraries
library(car)
library(performance)
library(see)
library(ggplot2)
library(gridExtra)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
data <- read.csv("ehi1.csv")
# Check data structure
cat("Data dimensions:", dim(data), "\n")
cat("Variables of interest:\n")
cat("IVs: demo_sex, demo_age, demo_edu\n")
cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
# Check for missing values
cat("Missing values check:\n")
missing_summary <- data %>%
select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
summarise_all(~sum(is.na(.)))
print(missing_summary)
# Remove rows with missing values
data_clean <- data %>%
select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
filter(complete.cases(.))
cat("\nClean data dimensions:", dim(data_clean), "\n")
# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
# Check demo_edu levels and recode if needed
cat("\nEducation levels:\n")
print(table(data_clean$demo_edu))
# Recode education as ordinal (assuming higher values = more education)
edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate",
"University - Undergraduate", "University - Graduate")
data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
# Verify recoding
cat("\nSex recoding (0=Female, 1=Male):\n")
print(table(data_clean$demo_sex_numeric))
cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
print(table(data_clean$demo_edu_numeric))