library(dplyr) setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21") # Read the data (with check.names=FALSE to preserve original column names) # Keep empty cells as empty strings, not NA # Only convert the literal string "NA" to NA, not empty strings df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA") # Remove trailing columns with empty names (dplyr requires all columns to have names) empty_cols <- which(names(df) == "" | is.na(names(df))) if (length(empty_cols) > 0) { df <- df[, -empty_cols, drop = FALSE] } # Set to TRUE to save all distributions to a document file save_to_doc <- TRUE doc_filename <- "eohi3_quotas.txt" # ============================================================================= # SINGLE VARIABLE DISTRIBUTIONS # ============================================================================= dist_age <- df %>% count(taq_age, sort = TRUE) print(dist_age) dist_sex <- df %>% count(taq_sex, sort = TRUE) print(dist_sex) dist_citizenship <- df %>% count(citizenship, sort = TRUE) print(dist_citizenship) dist_group <- df %>% count(group, sort = TRUE) print(dist_group) dist_temporalDO <- df %>% count(temporalDO, sort = TRUE) print(dist_temporalDO) dist_perspective <- df %>% count(perspective, sort = TRUE) print(dist_perspective) # ============================================================================= # NESTED DISTRIBUTIONS # ============================================================================= dist_age_citizenship <- df %>% count(citizenship, taq_age) %>% arrange(citizenship, taq_age) print(dist_age_citizenship) dist_sex_citizenship <- df %>% count(citizenship, taq_sex) %>% arrange(citizenship, taq_sex) print(dist_sex_citizenship) dist_age_temporalDO <- df %>% count(temporalDO, taq_age) %>% arrange(temporalDO, taq_age) print(dist_age_temporalDO) dist_age_perspective <- df %>% count(perspective, taq_age) %>% arrange(perspective, taq_age) print(dist_age_perspective) dist_sex_temporalDO <- df %>% count(temporalDO, taq_sex) %>% arrange(temporalDO, taq_sex) print(dist_sex_temporalDO) dist_sex_perspective <- df %>% count(perspective, taq_sex) %>% arrange(perspective, taq_sex) print(dist_sex_perspective) # ============================================================================= # OPTIONAL: SAVE ALL DISTRIBUTIONS TO DOCUMENT # ============================================================================= if (save_to_doc) { sink(doc_filename) cat("DISTRIBUTION REPORT\n") cat("==================\n\n") cat("SINGLE VARIABLE DISTRIBUTIONS\n") cat("------------------------------\n\n") cat("Distribution of taq_age:\n") print(dist_age) cat("\n\n") cat("Distribution of taq_sex:\n") print(dist_sex) cat("\n\n") cat("Distribution of citizenship:\n") print(dist_citizenship) cat("\n\n") cat("Distribution of group:\n") print(dist_group) cat("\n\n") cat("Distribution of temporalDO:\n") print(dist_temporalDO) cat("\n\n") cat("Distribution of perspective:\n") print(dist_perspective) cat("\n\n") cat("NESTED DISTRIBUTIONS\n") cat("---------------------\n\n") cat("Age within Citizenship:\n") print(dist_age_citizenship) cat("\n\n") cat("Sex within Citizenship:\n") print(dist_sex_citizenship) cat("\n\n") cat("Age within temporalDO:\n") print(dist_age_temporalDO) cat("\n\n") cat("Age within perspective:\n") print(dist_age_perspective) cat("\n\n") cat("Sex within temporalDO:\n") print(dist_sex_temporalDO) cat("\n\n") cat("Sex within perspective:\n") print(dist_sex_perspective) cat("\n") sink() cat("Distributions saved to:", doc_filename, "\n") }