299 lines
9.6 KiB
R
299 lines
9.6 KiB
R
# Script to compute AOT and CRT scales in eohi2.csv
|
|
# AOT: Reverse codes items 4-7, then averages all 8 items
|
|
# CRT: Calculates proportion of correct and intuitive responses
|
|
|
|
# Load necessary library
|
|
library(dplyr)
|
|
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
|
|
|
# Read the data (with check.names=FALSE to preserve original column names)
|
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
|
|
|
# Define source columns
|
|
aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
|
|
crt_cols <- c("crt_1", "crt_2", "crt_3")
|
|
|
|
# Define target columns
|
|
target_cols <- c("aot_total", "crt_correct", "crt_int")
|
|
|
|
# Define correct and intuitive CRT answers
|
|
crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
|
|
crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
|
|
|
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
|
|
|
# Get actual column names from dataframe (trimmed)
|
|
df_cols <- trimws(names(df))
|
|
|
|
# Check AOT columns
|
|
missing_aot <- aot_cols[!aot_cols %in% df_cols]
|
|
existing_aot <- aot_cols[aot_cols %in% df_cols]
|
|
|
|
cat("AOT Source Columns:\n")
|
|
cat(" Expected: 8 columns\n")
|
|
cat(" Found:", length(existing_aot), "columns\n")
|
|
cat(" Missing:", length(missing_aot), "columns\n")
|
|
|
|
if (length(missing_aot) > 0) {
|
|
cat("\n Missing AOT columns:\n")
|
|
for (col in missing_aot) {
|
|
cat(" -", col, "\n")
|
|
}
|
|
}
|
|
|
|
# Check CRT columns
|
|
missing_crt <- crt_cols[!crt_cols %in% df_cols]
|
|
existing_crt <- crt_cols[crt_cols %in% df_cols]
|
|
|
|
cat("\nCRT Source Columns:\n")
|
|
cat(" Expected: 3 columns\n")
|
|
cat(" Found:", length(existing_crt), "columns\n")
|
|
cat(" Missing:", length(missing_crt), "columns\n")
|
|
|
|
if (length(missing_crt) > 0) {
|
|
cat("\n Missing CRT columns:\n")
|
|
for (col in missing_crt) {
|
|
cat(" -", col, "\n")
|
|
}
|
|
}
|
|
|
|
# Check target columns
|
|
missing_targets <- target_cols[!target_cols %in% df_cols]
|
|
existing_targets <- target_cols[target_cols %in% df_cols]
|
|
|
|
cat("\nTarget Columns:\n")
|
|
cat(" Expected: 3 columns\n")
|
|
cat(" Found:", length(existing_targets), "columns\n")
|
|
cat(" Missing:", length(missing_targets), "columns\n")
|
|
|
|
if (length(missing_targets) > 0) {
|
|
cat("\n Missing target columns:\n")
|
|
for (col in missing_targets) {
|
|
cat(" -", col, "\n")
|
|
}
|
|
}
|
|
|
|
cat("\n=== END CHECK ===\n\n")
|
|
|
|
# Stop if critical columns are missing
|
|
if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
|
|
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
|
|
}
|
|
|
|
cat("Proceeding with processing...\n\n")
|
|
|
|
# ============= PROCESS AOT SCALE =============
|
|
cat("Processing AOT scale...\n")
|
|
|
|
# Convert AOT columns to numeric (handling any non-numeric values)
|
|
for (col in aot_cols) {
|
|
if (col %in% names(df)) {
|
|
df[[col]] <- as.numeric(df[[col]])
|
|
}
|
|
}
|
|
|
|
# Calculate average with reverse coding (WITHOUT modifying original values)
|
|
# Items 4, 5, 6, 7 are reverse coded for calculation only
|
|
df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
|
|
# Create a copy for calculation
|
|
values <- as.numeric(row)
|
|
|
|
# Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
|
|
reverse_positions <- c(4, 5, 6, 7)
|
|
values[reverse_positions] <- values[reverse_positions] * -1
|
|
|
|
# Return mean (na.rm = TRUE handles missing values)
|
|
mean(values, na.rm = TRUE)
|
|
})
|
|
|
|
cat(" AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
|
|
cat(" Original AOT item values preserved in dataframe.\n\n")
|
|
|
|
# ============= PROCESS CRT SCALES =============
|
|
cat("Processing CRT scales...\n")
|
|
|
|
# Initialize CRT columns
|
|
df$crt_correct <- NA
|
|
df$crt_int <- NA
|
|
|
|
# Process each row
|
|
for (i in 1:nrow(df)) {
|
|
# CRT Correct
|
|
crt_correct_count <- 0
|
|
crt_correct_n <- 0
|
|
|
|
for (j in 1:3) {
|
|
col <- crt_cols[j]
|
|
if (col %in% names(df)) {
|
|
response <- trimws(tolower(as.character(df[i, col])))
|
|
correct_answer <- tolower(crt_correct_answers[j])
|
|
|
|
if (!is.na(response) && response != "") {
|
|
crt_correct_n <- crt_correct_n + 1
|
|
if (response == correct_answer) {
|
|
crt_correct_count <- crt_correct_count + 1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Calculate proportion correct
|
|
if (crt_correct_n > 0) {
|
|
df$crt_correct[i] <- crt_correct_count / crt_correct_n
|
|
}
|
|
|
|
# CRT Intuitive
|
|
crt_int_count <- 0
|
|
crt_int_n <- 0
|
|
|
|
for (j in 1:3) {
|
|
col <- crt_cols[j]
|
|
if (col %in% names(df)) {
|
|
response <- trimws(tolower(as.character(df[i, col])))
|
|
intuitive_answer <- tolower(crt_intuitive_answers[j])
|
|
|
|
if (!is.na(response) && response != "") {
|
|
crt_int_n <- crt_int_n + 1
|
|
if (response == intuitive_answer) {
|
|
crt_int_count <- crt_int_count + 1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# Calculate proportion intuitive
|
|
if (crt_int_n > 0) {
|
|
df$crt_int[i] <- crt_int_count / crt_int_n
|
|
}
|
|
}
|
|
|
|
cat(" CRT correct and intuitive scores calculated.\n\n")
|
|
|
|
cat("=== PROCESSING COMPLETE ===\n\n")
|
|
|
|
|
|
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
|
|
# This function can be run multiple times to check different random rows
|
|
|
|
qa_check_random_row <- function() {
|
|
# Pick a random row
|
|
random_row <- sample(1:nrow(df), 1)
|
|
|
|
cat("\n========================================\n")
|
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
|
cat("========================================\n\n")
|
|
|
|
# AOT Check
|
|
cat("--- AOT SCALE ---\n")
|
|
cat("Source values (original in CSV):\n")
|
|
aot_original <- numeric(8)
|
|
aot_for_calc <- numeric(8)
|
|
|
|
for (i in 1:8) {
|
|
col <- aot_cols[i]
|
|
val <- if (col %in% names(df)) df[random_row, col] else NA
|
|
aot_original[i] <- val
|
|
|
|
# Apply reversal for items 4-7
|
|
if (i %in% 4:7) {
|
|
aot_for_calc[i] <- val * -1
|
|
cat(sprintf(" %s: %s (reversed to %s for calculation)\n",
|
|
col,
|
|
ifelse(is.na(val), "NA", as.character(val)),
|
|
ifelse(is.na(val), "NA", as.character(val * -1))))
|
|
} else {
|
|
aot_for_calc[i] <- val
|
|
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
|
|
}
|
|
}
|
|
|
|
# Manual calculation check
|
|
valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
|
|
if (length(valid_aot) > 0) {
|
|
expected_mean <- mean(valid_aot)
|
|
actual_value <- df$aot_total[random_row]
|
|
cat(sprintf("\nCalculation check:\n"))
|
|
cat(sprintf(" Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
|
|
cat(sprintf(" Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
|
|
cat(sprintf(" Target value (aot_total): %.5f\n", actual_value))
|
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
|
|
} else {
|
|
cat("\n No valid AOT values to calculate.\n")
|
|
}
|
|
|
|
# CRT Check
|
|
cat("\n--- CRT SCALE ---\n")
|
|
cat("Source values:\n")
|
|
crt_correct_count <- 0
|
|
crt_int_count <- 0
|
|
crt_n <- 0
|
|
|
|
for (i in 1:3) {
|
|
col <- crt_cols[i]
|
|
val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
|
|
val_trimmed <- trimws(tolower(val))
|
|
|
|
correct_ans <- crt_correct_answers[i]
|
|
intuitive_ans <- crt_intuitive_answers[i]
|
|
|
|
is_correct <- val_trimmed == tolower(correct_ans)
|
|
is_intuitive <- val_trimmed == tolower(intuitive_ans)
|
|
|
|
if (val_trimmed != "" && !is.na(val_trimmed)) {
|
|
crt_n <- crt_n + 1
|
|
if (is_correct) crt_correct_count <- crt_correct_count + 1
|
|
if (is_intuitive) crt_int_count <- crt_int_count + 1
|
|
}
|
|
|
|
cat(sprintf(" %s: '%s'\n", col, val))
|
|
cat(sprintf(" Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
|
|
cat(sprintf(" Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
|
|
}
|
|
|
|
cat("\nCalculation check:\n")
|
|
if (crt_n > 0) {
|
|
expected_correct <- crt_correct_count / crt_n
|
|
expected_int <- crt_int_count / crt_n
|
|
actual_correct <- df$crt_correct[random_row]
|
|
actual_int <- df$crt_int[random_row]
|
|
|
|
cat(sprintf(" Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
|
|
cat(sprintf(" Target value (crt_correct): %.5f\n", actual_correct))
|
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
|
|
|
|
cat(sprintf("\n Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
|
|
cat(sprintf(" Target value (crt_int): %.5f\n", actual_int))
|
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
|
|
} else {
|
|
cat(" No valid CRT responses to calculate.\n")
|
|
}
|
|
|
|
cat("\n========================================\n")
|
|
cat("END QA CHECK\n")
|
|
cat("========================================\n\n")
|
|
}
|
|
|
|
# Run QA check on first random row
|
|
cat("\n\n")
|
|
qa_check_random_row()
|
|
|
|
# Instructions for running additional checks
|
|
cat("\n")
|
|
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
|
|
cat("Run this command in R console:\n")
|
|
cat(" qa_check_random_row()\n")
|
|
cat("\n")
|
|
|
|
|
|
# Save the modified dataframe back to CSV
|
|
# na="" writes NA values as empty cells instead of "NA" text
|
|
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
|
|
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
|
|
|
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
|
|
cat("Review the output above, then uncomment line 253 to save changes.\n")
|
|
cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
|