library(dplyr) setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21") # Read the data (with check.names=FALSE to preserve original column names) # Keep empty cells as empty strings, not NA # Only convert the literal string "NA" to NA, not empty strings df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA") # Populate citizenship column from taq_cit_1 and taq_cit_2 # If both have values, set to "Both" # Otherwise, use the value from whichever column has a value # Empty values remain as empty strings (not NA) # Ensure citizenship column exists, initialize with empty strings if needed if (!"citizenship" %in% names(df)) { df$citizenship <- "" } # Convert NA to empty string for taq_cit columns to ensure consistent handling df$taq_cit_1[is.na(df$taq_cit_1)] <- "" df$taq_cit_2[is.na(df$taq_cit_2)] <- "" # Populate citizenship based on taq_cit_1 and taq_cit_2 using base R # Check if both have values (non-empty) both_have_values <- df$taq_cit_1 != "" & df$taq_cit_2 != "" # Check if only taq_cit_1 has a value only_cit1 <- df$taq_cit_1 != "" & df$taq_cit_2 == "" # Check if only taq_cit_2 has a value only_cit2 <- df$taq_cit_2 != "" & df$taq_cit_1 == "" # Assign values df$citizenship[both_have_values] <- "Both" df$citizenship[only_cit1] <- df$taq_cit_1[only_cit1] df$citizenship[only_cit2] <- df$taq_cit_2[only_cit2] # For rows where neither has a value, citizenship keeps its original value (may be empty string) write.csv(df, "eohi3_raw.csv", row.names = FALSE, na = "", quote = TRUE)