options(scipen = 999) library(dplyr) setwd("C:/Users/irina/Documents/DND/EOHI/eohi1") df <- read.csv("ehi1.csv") data <- df %>% select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>% mutate(demo_edu = as.factor(demo_edu)) # examine data object str(data) colSums(is.na(data)) sapply(data, class) levels(data$demo_edu) data$demo_edu <- factor(data$demo_edu, levels = c( "High School (or equivalent)", "Trade School (non-military)", "College Diploma/Certificate", "University - Undergraduate", "University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)" )) levels(data$demo_edu) # Create dummy variables dummy_vars <- model.matrix(~ demo_edu - 1, data = data) dummy_df <- as.data.frame(dummy_vars) # Rename columns with meaningful names (excluding reference level) colnames(dummy_df) <- c( "edu_highschool", # reference level (will be dropped) "edu_trade", "edu_college", "edu_uni_undergrad", "edu_uni_masters", "edu_uni_phd", "edu_prof" ) # Add to your data data <- cbind(data, dummy_df)