56 lines
1.8 KiB
R
56 lines
1.8 KiB
R
# Regression Analysis - Assumption Checking
|
|
# IVs: demo_sex, demo_age, demo_edu
|
|
# DVs: eohiDGEN_mean, ehi_global_mean
|
|
# Total: 6 regression models
|
|
|
|
options(scipen = 999)
|
|
|
|
# Load required libraries
|
|
library(car)
|
|
library(performance)
|
|
library(see)
|
|
library(ggplot2)
|
|
library(gridExtra)
|
|
library(dplyr)
|
|
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
|
data <- read.csv("ehi1.csv")
|
|
|
|
# Check data structure
|
|
cat("Data dimensions:", dim(data), "\n")
|
|
cat("Variables of interest:\n")
|
|
cat("IVs: demo_sex, demo_age, demo_edu\n")
|
|
cat("DVs: eohiDGEN_mean, ehi_global_mean\n\n")
|
|
|
|
# Check for missing values
|
|
cat("Missing values check:\n")
|
|
missing_summary <- data %>%
|
|
select(demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
|
|
summarise_all(~sum(is.na(.)))
|
|
print(missing_summary)
|
|
|
|
# Remove rows with missing values
|
|
data_clean <- data %>%
|
|
select(pID, demo_sex, demo_age, demo_edu, eohiDGEN_mean, ehi_global_mean) %>%
|
|
filter(complete.cases(.))
|
|
|
|
cat("\nClean data dimensions:", dim(data_clean), "\n")
|
|
|
|
# Recode demo_sex as numeric for regression (0 = Female, 1 = Male)
|
|
data_clean$demo_sex_numeric <- ifelse(data_clean$demo_sex == "Male", 1, 0)
|
|
|
|
# Check demo_edu levels and recode if needed
|
|
cat("\nEducation levels:\n")
|
|
print(table(data_clean$demo_edu))
|
|
|
|
# Recode education as ordinal (assuming higher values = more education)
|
|
edu_levels <- c("High School (or equivalent)", "College Diploma/Certificate",
|
|
"University - Undergraduate", "University - Graduate")
|
|
data_clean$demo_edu_numeric <- match(data_clean$demo_edu, edu_levels)
|
|
|
|
# Verify recoding
|
|
cat("\nSex recoding (0=Female, 1=Male):\n")
|
|
print(table(data_clean$demo_sex_numeric))
|
|
cat("\nEducation recoding (1=HS, 2=College, 3=Undergrad, 4=Grad):\n")
|
|
print(table(data_clean$demo_edu_numeric))
|