options(scipen = 999) setwd("C:/Users/irina/Documents/DND/EOHI/eohi2") # Load data data <- read.csv("eohi2.csv") # Create DGEN EHI difference variables (Past - Future) for different time intervals # === 5-YEAR DGEN PAST-FUTURE PAIRS === data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val # === 10-YEAR DGEN PAST-FUTURE PAIRS === data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val # QA: Verify calculations - FIRST 5 ROWS with detailed output cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n") cat("--- 5-YEAR DGEN VARIABLES ---\n") for (i in 1:5) { cat(sprintf("\nRow %d:\n", i)) cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n", data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i], data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i], data$ehiDGEN_5_Pref[i], ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗"))) cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n", data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i], data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i], data$ehiDGEN_5_Pers[i], ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗"))) cat(sprintf(" Val: %g - %g = %g | ehiDGEN_5_Val = %g %s\n", data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i], data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i], data$ehiDGEN_5_Val[i], ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗"))) } cat("\n--- 10-YEAR DGEN VARIABLES ---\n") for (i in 1:5) { cat(sprintf("\nRow %d:\n", i)) cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n", data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i], data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i], data$ehiDGEN_10_Pref[i], ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗"))) cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n", data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i], data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i], data$ehiDGEN_10_Pers[i], ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗"))) cat(sprintf(" Val: %g - %g = %g | ehiDGEN_10_Val = %g %s\n", data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i], data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i], data$ehiDGEN_10_Val[i], ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗"))) } # Full QA check for all rows and all variables cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n") qa_pairs <- list( # 5-year pairs list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"), list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"), list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"), list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"), list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"), list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"), list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"), list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"), list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"), list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"), list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"), list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"), list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"), list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"), list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"), # 10-year pairs list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"), list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"), list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"), list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"), list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"), list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"), list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"), list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"), list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"), list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"), list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"), list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"), list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"), list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"), list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"), # 5-10 year change pairs list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"), list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"), list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"), list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"), list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"), list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"), list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"), list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"), list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"), list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"), list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"), list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"), list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"), list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"), list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice") ) all_checks_passed <- TRUE for (pair in qa_pairs) { # Calculate expected difference expected_diff <- data[[pair$npast]] - data[[pair$nfut]] # Get actual value in target variable actual_value <- data[[pair$target]] # Compare (allowing for floating point precision issues) discrepancies <- which(abs(expected_diff - actual_value) > 1e-10) if (length(discrepancies) > 0) { cat(sprintf("FAIL: %s\n", pair$target)) cat(sprintf(" Found %d discrepancies in rows: %s\n", length(discrepancies), paste(head(discrepancies, 10), collapse = ", "))) # Show first discrepancy details row_num <- discrepancies[1] cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n", row_num, pair$npast, data[[pair$npast]][row_num], pair$nfut, data[[pair$nfut]][row_num], expected_diff[row_num], pair$target, actual_value[row_num])) all_checks_passed <- FALSE } else { cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data))) } } cat("\n") if (all_checks_passed) { cat("*** ALL QA CHECKS PASSED ***\n") } else { cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n") } # Save updated dataset write.csv(data, "eohi2.csv", row.names = FALSE) cat("\nDataset saved to eohi2.csv\n")