options(scipen = 999) setwd("C:/Users/irina/Documents/DND/EOHI/eohi1") data <- read.csv("ehi1.csv") # Check the levels of the demo_edu variable print(levels(factor(data$demo_edu))) # Also show the unique values and their frequencies print("\nUnique values and frequencies:") print(table(data$demo_edu, useNA = "ifany")) # Recode demo_edu into 3 ordinal levels data$edu3 <- NA # HS_TS: High School and Trade School data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS" # C_Ug: College and University - Undergraduate data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug" # grad_prof: University - Graduate, University - PhD, and Professional Degree data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof" # Convert to ordered factor data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE) # Check the recoded variable print(table(data$edu3, useNA = "ifany")) # Verify the recoding print(table(data$demo_edu, data$edu3, useNA = "ifany")) # Convert edu3 to numeric factor for correlations (1, 2, 3) # First ensure edu3 is a factor, then convert to numeric data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE) data$edu_num <- as.numeric(data$edu3) # Check the numeric conversion print(table(data$edu_num, useNA = "ifany")) # Verify the conversion print("\nCross-tabulation of factor vs numeric:") print(table(data$edu3, data$edu_num, useNA = "ifany")) # Note: To save the dataset, close any programs that have ehi1.csv open, then run: # write.csv(data, "ehi1.csv", row.names = FALSE)