eohi/eohi2/dataP 11 - CORRECT ehi vars.r
2026-01-22 17:55:35 -05:00

235 lines
14 KiB
R

options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Create EHI difference variables (NPast - NFut) for different time intervals
# === 5-YEAR PAST-FUTURE PAIRS ===
# Preferences
data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
# Personality
data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
# Values
data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
# === 10-YEAR PAST-FUTURE PAIRS ===
# Preferences
data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
# Personality
data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
# Values
data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
# === 5-10 YEAR CHANGE VARIABLES ===
# Preferences
data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
# Personality
data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
# Values
data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
# QA: Verify calculations - FIRST 5 ROWS with detailed output
cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
cat("--- 5-YEAR VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" pref_read: %g - %g = %g | ehi5_pref_read = %g %s\n",
data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
data$ehi5_pref_read[i],
ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pref_music: %g - %g = %g | ehi5_pref_music = %g %s\n",
data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
data$ehi5_pref_music[i],
ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
data$ehi5_pers_extravert[i],
ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
}
cat("\n--- 10-YEAR VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" pref_read: %g - %g = %g | ehi10_pref_read = %g %s\n",
data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
data$ehi10_pref_read[i],
ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pref_music: %g - %g = %g | ehi10_pref_music = %g %s\n",
data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
data$ehi10_pref_music[i],
ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
data$ehi10_pers_extravert[i],
ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
}
cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" pref_read: %g - %g = %g | ehi5.10_pref_read = %g %s\n",
data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
data$ehi5.10_pref_read[i],
ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pref_music: %g - %g = %g | ehi5.10_pref_music = %g %s\n",
data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
data$ehi5.10_pref_music[i],
ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
data$ehi5.10_pers_extravert[i],
ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
}
# Full QA check for all rows and all variables
cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
qa_pairs <- list(
# 5-year pairs
list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
# 10-year pairs
list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
# 5-10 year change pairs
list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
)
all_checks_passed <- TRUE
for (pair in qa_pairs) {
# Calculate expected difference
expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
# Get actual value in target variable
actual_value <- data[[pair$target]]
# Compare (allowing for floating point precision issues)
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s\n", pair$target))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
# Show first discrepancy details
row_num <- discrepancies[1]
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
row_num,
pair$npast, data[[pair$npast]][row_num],
pair$nfut, data[[pair$nfut]][row_num],
expected_diff[row_num],
pair$target, actual_value[row_num]))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(data, "eohi2.csv", row.names = FALSE)
cat("\nDataset saved to eohi2.csv\n")