eohi/eohi2/dataP 05 - recode scales VARS.r
2026-01-22 17:55:35 -05:00

299 lines
9.6 KiB
R

# Script to compute AOT and CRT scales in eohi2.csv
# AOT: Reverse codes items 4-7, then averages all 8 items
# CRT: Calculates proportion of correct and intuitive responses
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define source columns
aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
crt_cols <- c("crt_1", "crt_2", "crt_3")
# Define target columns
target_cols <- c("aot_total", "crt_correct", "crt_int")
# Define correct and intuitive CRT answers
crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Check AOT columns
missing_aot <- aot_cols[!aot_cols %in% df_cols]
existing_aot <- aot_cols[aot_cols %in% df_cols]
cat("AOT Source Columns:\n")
cat(" Expected: 8 columns\n")
cat(" Found:", length(existing_aot), "columns\n")
cat(" Missing:", length(missing_aot), "columns\n")
if (length(missing_aot) > 0) {
cat("\n Missing AOT columns:\n")
for (col in missing_aot) {
cat(" -", col, "\n")
}
}
# Check CRT columns
missing_crt <- crt_cols[!crt_cols %in% df_cols]
existing_crt <- crt_cols[crt_cols %in% df_cols]
cat("\nCRT Source Columns:\n")
cat(" Expected: 3 columns\n")
cat(" Found:", length(existing_crt), "columns\n")
cat(" Missing:", length(missing_crt), "columns\n")
if (length(missing_crt) > 0) {
cat("\n Missing CRT columns:\n")
for (col in missing_crt) {
cat(" -", col, "\n")
}
}
# Check target columns
missing_targets <- target_cols[!target_cols %in% df_cols]
existing_targets <- target_cols[target_cols %in% df_cols]
cat("\nTarget Columns:\n")
cat(" Expected: 3 columns\n")
cat(" Found:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n Missing target columns:\n")
for (col in missing_targets) {
cat(" -", col, "\n")
}
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# ============= PROCESS AOT SCALE =============
cat("Processing AOT scale...\n")
# Convert AOT columns to numeric (handling any non-numeric values)
for (col in aot_cols) {
if (col %in% names(df)) {
df[[col]] <- as.numeric(df[[col]])
}
}
# Calculate average with reverse coding (WITHOUT modifying original values)
# Items 4, 5, 6, 7 are reverse coded for calculation only
df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
# Create a copy for calculation
values <- as.numeric(row)
# Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
reverse_positions <- c(4, 5, 6, 7)
values[reverse_positions] <- values[reverse_positions] * -1
# Return mean (na.rm = TRUE handles missing values)
mean(values, na.rm = TRUE)
})
cat(" AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
cat(" Original AOT item values preserved in dataframe.\n\n")
# ============= PROCESS CRT SCALES =============
cat("Processing CRT scales...\n")
# Initialize CRT columns
df$crt_correct <- NA
df$crt_int <- NA
# Process each row
for (i in 1:nrow(df)) {
# CRT Correct
crt_correct_count <- 0
crt_correct_n <- 0
for (j in 1:3) {
col <- crt_cols[j]
if (col %in% names(df)) {
response <- trimws(tolower(as.character(df[i, col])))
correct_answer <- tolower(crt_correct_answers[j])
if (!is.na(response) && response != "") {
crt_correct_n <- crt_correct_n + 1
if (response == correct_answer) {
crt_correct_count <- crt_correct_count + 1
}
}
}
}
# Calculate proportion correct
if (crt_correct_n > 0) {
df$crt_correct[i] <- crt_correct_count / crt_correct_n
}
# CRT Intuitive
crt_int_count <- 0
crt_int_n <- 0
for (j in 1:3) {
col <- crt_cols[j]
if (col %in% names(df)) {
response <- trimws(tolower(as.character(df[i, col])))
intuitive_answer <- tolower(crt_intuitive_answers[j])
if (!is.na(response) && response != "") {
crt_int_n <- crt_int_n + 1
if (response == intuitive_answer) {
crt_int_count <- crt_int_count + 1
}
}
}
}
# Calculate proportion intuitive
if (crt_int_n > 0) {
df$crt_int[i] <- crt_int_count / crt_int_n
}
}
cat(" CRT correct and intuitive scores calculated.\n\n")
cat("=== PROCESSING COMPLETE ===\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
# This function can be run multiple times to check different random rows
qa_check_random_row <- function() {
# Pick a random row
random_row <- sample(1:nrow(df), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
cat("========================================\n\n")
# AOT Check
cat("--- AOT SCALE ---\n")
cat("Source values (original in CSV):\n")
aot_original <- numeric(8)
aot_for_calc <- numeric(8)
for (i in 1:8) {
col <- aot_cols[i]
val <- if (col %in% names(df)) df[random_row, col] else NA
aot_original[i] <- val
# Apply reversal for items 4-7
if (i %in% 4:7) {
aot_for_calc[i] <- val * -1
cat(sprintf(" %s: %s (reversed to %s for calculation)\n",
col,
ifelse(is.na(val), "NA", as.character(val)),
ifelse(is.na(val), "NA", as.character(val * -1))))
} else {
aot_for_calc[i] <- val
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
}
}
# Manual calculation check
valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
if (length(valid_aot) > 0) {
expected_mean <- mean(valid_aot)
actual_value <- df$aot_total[random_row]
cat(sprintf("\nCalculation check:\n"))
cat(sprintf(" Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
cat(sprintf(" Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
cat(sprintf(" Target value (aot_total): %.5f\n", actual_value))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat("\n No valid AOT values to calculate.\n")
}
# CRT Check
cat("\n--- CRT SCALE ---\n")
cat("Source values:\n")
crt_correct_count <- 0
crt_int_count <- 0
crt_n <- 0
for (i in 1:3) {
col <- crt_cols[i]
val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
val_trimmed <- trimws(tolower(val))
correct_ans <- crt_correct_answers[i]
intuitive_ans <- crt_intuitive_answers[i]
is_correct <- val_trimmed == tolower(correct_ans)
is_intuitive <- val_trimmed == tolower(intuitive_ans)
if (val_trimmed != "" && !is.na(val_trimmed)) {
crt_n <- crt_n + 1
if (is_correct) crt_correct_count <- crt_correct_count + 1
if (is_intuitive) crt_int_count <- crt_int_count + 1
}
cat(sprintf(" %s: '%s'\n", col, val))
cat(sprintf(" Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
cat(sprintf(" Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
}
cat("\nCalculation check:\n")
if (crt_n > 0) {
expected_correct <- crt_correct_count / crt_n
expected_int <- crt_int_count / crt_n
actual_correct <- df$crt_correct[random_row]
actual_int <- df$crt_int[random_row]
cat(sprintf(" Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
cat(sprintf(" Target value (crt_correct): %.5f\n", actual_correct))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
cat(sprintf("\n Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
cat(sprintf(" Target value (crt_int): %.5f\n", actual_int))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat(" No valid CRT responses to calculate.\n")
}
cat("\n========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on first random row
cat("\n\n")
qa_check_random_row()
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
cat("Run this command in R console:\n")
cat(" qa_check_random_row()\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
cat("Review the output above, then uncomment line 253 to save changes.\n")
cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")