updating eohi folder w/ third eohi exp. Reviewed-on: #3 Co-authored-by: Irina Levit <irina.levit.rn@gmail.com> Co-committed-by: Irina Levit <irina.levit.rn@gmail.com>
40 lines
1.5 KiB
R
40 lines
1.5 KiB
R
library(dplyr)
|
|
|
|
setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21")
|
|
|
|
# Read the data (with check.names=FALSE to preserve original column names)
|
|
# Keep empty cells as empty strings, not NA
|
|
# Only convert the literal string "NA" to NA, not empty strings
|
|
df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA")
|
|
|
|
# Populate citizenship column from taq_cit_1 and taq_cit_2
|
|
# If both have values, set to "Both"
|
|
# Otherwise, use the value from whichever column has a value
|
|
# Empty values remain as empty strings (not NA)
|
|
|
|
# Ensure citizenship column exists, initialize with empty strings if needed
|
|
if (!"citizenship" %in% names(df)) {
|
|
df$citizenship <- ""
|
|
}
|
|
|
|
# Convert NA to empty string for taq_cit columns to ensure consistent handling
|
|
df$taq_cit_1[is.na(df$taq_cit_1)] <- ""
|
|
df$taq_cit_2[is.na(df$taq_cit_2)] <- ""
|
|
|
|
# Populate citizenship based on taq_cit_1 and taq_cit_2 using base R
|
|
# Check if both have values (non-empty)
|
|
both_have_values <- df$taq_cit_1 != "" & df$taq_cit_2 != ""
|
|
|
|
# Check if only taq_cit_1 has a value
|
|
only_cit1 <- df$taq_cit_1 != "" & df$taq_cit_2 == ""
|
|
|
|
# Check if only taq_cit_2 has a value
|
|
only_cit2 <- df$taq_cit_2 != "" & df$taq_cit_1 == ""
|
|
|
|
# Assign values
|
|
df$citizenship[both_have_values] <- "Both"
|
|
df$citizenship[only_cit1] <- df$taq_cit_1[only_cit1]
|
|
df$citizenship[only_cit2] <- df$taq_cit_2[only_cit2]
|
|
# For rows where neither has a value, citizenship keeps its original value (may be empty string)
|
|
write.csv(df, "eohi3_raw.csv", row.names = FALSE, na = "", quote = TRUE)
|