eohi/eohi3/dataREVIEW-JAN21/datap 03 - quotas.r
Irina Levit ba54687da2 eohi3-updates (#3)
updating eohi folder w/ third eohi exp.

Reviewed-on: #3
Co-authored-by: Irina Levit <irina.levit.rn@gmail.com>
Co-committed-by: Irina Levit <irina.levit.rn@gmail.com>
2026-01-26 16:30:09 -05:00

131 lines
3.6 KiB
R

library(dplyr)
setwd("/home/ladmin/Documents/DND/EOHI/eohi3/dataREVIEW-JAN21")
# Read the data (with check.names=FALSE to preserve original column names)
# Keep empty cells as empty strings, not NA
# Only convert the literal string "NA" to NA, not empty strings
df <- read.csv("eohi3_raw.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = "NA")
# Remove trailing columns with empty names (dplyr requires all columns to have names)
empty_cols <- which(names(df) == "" | is.na(names(df)))
if (length(empty_cols) > 0) {
df <- df[, -empty_cols, drop = FALSE]
}
# Set to TRUE to save all distributions to a document file
save_to_doc <- TRUE
doc_filename <- "eohi3_quotas.txt"
# =============================================================================
# SINGLE VARIABLE DISTRIBUTIONS
# =============================================================================
dist_age <- df %>% count(taq_age, sort = TRUE)
print(dist_age)
dist_sex <- df %>% count(taq_sex, sort = TRUE)
print(dist_sex)
dist_citizenship <- df %>% count(citizenship, sort = TRUE)
print(dist_citizenship)
dist_group <- df %>% count(group, sort = TRUE)
print(dist_group)
dist_temporalDO <- df %>% count(temporalDO, sort = TRUE)
print(dist_temporalDO)
dist_perspective <- df %>% count(perspective, sort = TRUE)
print(dist_perspective)
# =============================================================================
# NESTED DISTRIBUTIONS
# =============================================================================
dist_age_citizenship <- df %>% count(citizenship, taq_age) %>% arrange(citizenship, taq_age)
print(dist_age_citizenship)
dist_sex_citizenship <- df %>% count(citizenship, taq_sex) %>% arrange(citizenship, taq_sex)
print(dist_sex_citizenship)
dist_age_temporalDO <- df %>% count(temporalDO, taq_age) %>% arrange(temporalDO, taq_age)
print(dist_age_temporalDO)
dist_age_perspective <- df %>% count(perspective, taq_age) %>% arrange(perspective, taq_age)
print(dist_age_perspective)
dist_sex_temporalDO <- df %>% count(temporalDO, taq_sex) %>% arrange(temporalDO, taq_sex)
print(dist_sex_temporalDO)
dist_sex_perspective <- df %>% count(perspective, taq_sex) %>% arrange(perspective, taq_sex)
print(dist_sex_perspective)
# =============================================================================
# OPTIONAL: SAVE ALL DISTRIBUTIONS TO DOCUMENT
# =============================================================================
if (save_to_doc) {
sink(doc_filename)
cat("DISTRIBUTION REPORT\n")
cat("==================\n\n")
cat("SINGLE VARIABLE DISTRIBUTIONS\n")
cat("------------------------------\n\n")
cat("Distribution of taq_age:\n")
print(dist_age)
cat("\n\n")
cat("Distribution of taq_sex:\n")
print(dist_sex)
cat("\n\n")
cat("Distribution of citizenship:\n")
print(dist_citizenship)
cat("\n\n")
cat("Distribution of group:\n")
print(dist_group)
cat("\n\n")
cat("Distribution of temporalDO:\n")
print(dist_temporalDO)
cat("\n\n")
cat("Distribution of perspective:\n")
print(dist_perspective)
cat("\n\n")
cat("NESTED DISTRIBUTIONS\n")
cat("---------------------\n\n")
cat("Age within Citizenship:\n")
print(dist_age_citizenship)
cat("\n\n")
cat("Sex within Citizenship:\n")
print(dist_sex_citizenship)
cat("\n\n")
cat("Age within temporalDO:\n")
print(dist_age_temporalDO)
cat("\n\n")
cat("Age within perspective:\n")
print(dist_age_perspective)
cat("\n\n")
cat("Sex within temporalDO:\n")
print(dist_sex_temporalDO)
cat("\n\n")
cat("Sex within perspective:\n")
print(dist_sex_perspective)
cat("\n")
sink()
cat("Distributions saved to:", doc_filename, "\n")
}