eohi/.history/eohi1/datap 15 - education recoded 3 ordinal levels_20251027115834.r
2025-12-23 15:47:09 -05:00

50 lines
1.8 KiB
R

options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
data <- read.csv("ehi1.csv")
# Check the levels of the demo_edu variable
print(levels(factor(data$demo_edu)))
# Also show the unique values and their frequencies
print("\nUnique values and frequencies:")
print(table(data$demo_edu, useNA = "ifany"))
# Recode demo_edu into 3 ordinal levels
data$edu3 <- NA
# HS_TS: High School and Trade School
data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
# C_Ug: College and University - Undergraduate
data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
# grad_prof: University - Graduate, University - PhD, and Professional Degree
data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
# Convert to ordered factor
data$edu3 <- factor(data$edu3,
levels = c("HS_TS", "C_Ug", "grad_prof"),
ordered = TRUE)
# Check the recoded variable
print(table(data$edu3, useNA = "ifany"))
# Verify the recoding
print(table(data$demo_edu, data$edu3, useNA = "ifany"))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print("\nCross-tabulation of factor vs numeric:")
print(table(data$edu3, data$edu_num, useNA = "ifany"))
# Note: To save the dataset, close any programs that have ehi1.csv open, then run:
# write.csv(data, "ehi1.csv", row.names = FALSE)