140 lines
5.6 KiB
R
140 lines
5.6 KiB
R
options(scipen = 999)
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
|
|
|
# Load data
|
|
data <- read.csv("eohi2.csv")
|
|
|
|
# Calculate global mean scores for EHI variables across time intervals
|
|
|
|
# === DGEN 5-YEAR GLOBAL MEAN ===
|
|
data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers",
|
|
"ehiDGEN_5_Val")], na.rm = TRUE)
|
|
|
|
# === DGEN 10-YEAR GLOBAL MEAN ===
|
|
data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers",
|
|
"ehiDGEN_10_Val")], na.rm = TRUE)
|
|
|
|
# === 5-YEAR GLOBAL MEAN ===
|
|
data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN",
|
|
"ehi5_val_MEAN")], na.rm = TRUE)
|
|
|
|
# === 10-YEAR GLOBAL MEAN ===
|
|
data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN",
|
|
"ehi10_val_MEAN")], na.rm = TRUE)
|
|
|
|
# === 5-10 YEAR CHANGE GLOBAL MEAN ===
|
|
data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN",
|
|
"ehi5.10_val_MEAN")], na.rm = TRUE)
|
|
|
|
# QA: Verify mean calculations
|
|
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
|
|
cat("Verifying EHI global mean calculations\n\n")
|
|
|
|
cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
|
|
for (i in 1:5) {
|
|
vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i],
|
|
data$ehiDGEN_5_Val[i])
|
|
calc_mean <- mean(vals, na.rm = TRUE)
|
|
actual_mean <- data$ehiDGEN_5_mean[i]
|
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
|
cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
|
i, vals[1], vals[2], vals[3],
|
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
|
}
|
|
|
|
cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
|
|
for (i in 1:5) {
|
|
vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i],
|
|
data$ehiDGEN_10_Val[i])
|
|
calc_mean <- mean(vals, na.rm = TRUE)
|
|
actual_mean <- data$ehiDGEN_10_mean[i]
|
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
|
cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
|
i, vals[1], vals[2], vals[3],
|
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
|
}
|
|
|
|
cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
|
|
for (i in 1:5) {
|
|
vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i],
|
|
data$ehi5_val_MEAN[i])
|
|
calc_mean <- mean(vals, na.rm = TRUE)
|
|
actual_mean <- data$ehi5_global_mean[i]
|
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
|
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
|
|
i, vals[1], vals[2], vals[3],
|
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
|
}
|
|
|
|
cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
|
|
for (i in 1:5) {
|
|
vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i],
|
|
data$ehi10_val_MEAN[i])
|
|
calc_mean <- mean(vals, na.rm = TRUE)
|
|
actual_mean <- data$ehi10_global_mean[i]
|
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
|
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
|
|
i, vals[1], vals[2], vals[3],
|
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
|
}
|
|
|
|
cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
|
|
for (i in 1:5) {
|
|
vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i],
|
|
data$ehi5.10_val_MEAN[i])
|
|
calc_mean <- mean(vals, na.rm = TRUE)
|
|
actual_mean <- data$ehi5.10_global_mean[i]
|
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
|
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
|
|
i, vals[1], vals[2], vals[3],
|
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
|
}
|
|
|
|
# Overall QA check for all rows
|
|
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
|
|
|
|
qa_checks <- list(
|
|
# DGEN global means
|
|
list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
|
|
target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
|
|
list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
|
|
target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
|
|
|
|
# Domain-specific global means
|
|
list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
|
|
target = "ehi5_global_mean", name = "5-Year Global"),
|
|
list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
|
|
target = "ehi10_global_mean", name = "10-Year Global"),
|
|
list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
|
|
target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
|
|
)
|
|
|
|
all_checks_passed <- TRUE
|
|
|
|
for (check in qa_checks) {
|
|
calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
|
|
actual_mean <- data[[check$target]]
|
|
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
|
|
|
|
if (length(discrepancies) > 0) {
|
|
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
|
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
|
length(discrepancies),
|
|
paste(head(discrepancies, 10), collapse = ", ")))
|
|
all_checks_passed <- FALSE
|
|
} else {
|
|
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
|
|
check$name, length(check$vars), nrow(data)))
|
|
}
|
|
}
|
|
|
|
cat("\n")
|
|
if (all_checks_passed) {
|
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
|
} else {
|
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
|
}
|
|
|
|
# Save updated dataset
|
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
|
cat("\nDataset saved to eohi2.csv\n") |