118 lines
5.2 KiB
R
118 lines
5.2 KiB
R
options(scipen = 999)
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
|
|
|
# Load data
|
|
data <- read.csv("eohi2.csv")
|
|
|
|
# Create DGEN EHI difference variables (Past - Future) for different time intervals
|
|
|
|
# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
|
|
data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
|
|
data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
|
|
data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
|
|
|
|
# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
|
|
data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
|
|
data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
|
|
data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
|
|
|
|
# QA: Verify calculations - FIRST 5 ROWS with detailed output
|
|
cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
|
|
|
|
cat("--- 5-YEAR DGEN VARIABLES ---\n")
|
|
for (i in 1:5) {
|
|
cat(sprintf("\nRow %d:\n", i))
|
|
cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
|
|
data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
|
|
data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
|
|
data$ehiDGEN_5_Pref[i],
|
|
ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
|
|
cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
|
|
data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
|
|
data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
|
|
data$ehiDGEN_5_Pers[i],
|
|
ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
|
|
cat(sprintf(" Val: %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
|
|
data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
|
|
data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
|
|
data$ehiDGEN_5_Val[i],
|
|
ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
|
|
}
|
|
|
|
cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
|
|
for (i in 1:5) {
|
|
cat(sprintf("\nRow %d:\n", i))
|
|
cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
|
|
data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
|
|
data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
|
|
data$ehiDGEN_10_Pref[i],
|
|
ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
|
|
cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
|
|
data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
|
|
data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
|
|
data$ehiDGEN_10_Pers[i],
|
|
ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
|
|
cat(sprintf(" Val: %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
|
|
data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
|
|
data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
|
|
data$ehiDGEN_10_Val[i],
|
|
ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
|
|
}
|
|
|
|
# Full QA check for all rows and all variables
|
|
cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
|
|
|
|
qa_pairs <- list(
|
|
# 5-year DGEN pairs
|
|
list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
|
|
list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
|
|
list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
|
|
|
|
# 10-year DGEN pairs
|
|
list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
|
|
list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
|
|
list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
|
|
)
|
|
|
|
all_checks_passed <- TRUE
|
|
|
|
for (pair in qa_pairs) {
|
|
# Calculate expected difference
|
|
expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
|
|
|
|
# Get actual value in target variable
|
|
actual_value <- data[[pair$target]]
|
|
|
|
# Compare (allowing for floating point precision issues)
|
|
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
|
|
|
|
if (length(discrepancies) > 0) {
|
|
cat(sprintf("FAIL: %s\n", pair$target))
|
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
|
length(discrepancies),
|
|
paste(head(discrepancies, 10), collapse = ", ")))
|
|
|
|
# Show first discrepancy details
|
|
row_num <- discrepancies[1]
|
|
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
|
|
row_num,
|
|
pair$npast, data[[pair$npast]][row_num],
|
|
pair$nfut, data[[pair$nfut]][row_num],
|
|
expected_diff[row_num],
|
|
pair$target, actual_value[row_num]))
|
|
all_checks_passed <- FALSE
|
|
} else {
|
|
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
|
|
}
|
|
}
|
|
|
|
cat("\n")
|
|
if (all_checks_passed) {
|
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
|
} else {
|
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
|
}
|
|
|
|
# Save updated dataset
|
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
|
cat("\nDataset saved to eohi2.csv\n") |