# Script to compute AOT and CRT scales in eohi2.csv # AOT: Reverse codes items 4-7, then averages all 8 items # CRT: Calculates proportion of correct and intuitive responses # Load necessary library library(dplyr) setwd("C:/Users/irina/Documents/DND/EOHI/eohi2") # Read the data (with check.names=FALSE to preserve original column names) # na.strings=NULL keeps empty cells as empty strings instead of converting to NA df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL) # Define source columns aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8") crt_cols <- c("crt_1", "crt_2", "crt_3") # Define target columns target_cols <- c("aot_total", "crt_correct", "crt_int") # Define correct and intuitive CRT answers crt_correct_answers <- c("5 cents", "5 minutes", "47 days") crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days") # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE ============= cat("\n=== COLUMN EXISTENCE CHECK ===\n\n") # Get actual column names from dataframe (trimmed) df_cols <- trimws(names(df)) # Check AOT columns missing_aot <- aot_cols[!aot_cols %in% df_cols] existing_aot <- aot_cols[aot_cols %in% df_cols] cat("AOT Source Columns:\n") cat(" Expected: 8 columns\n") cat(" Found:", length(existing_aot), "columns\n") cat(" Missing:", length(missing_aot), "columns\n") if (length(missing_aot) > 0) { cat("\n Missing AOT columns:\n") for (col in missing_aot) { cat(" -", col, "\n") } } # Check CRT columns missing_crt <- crt_cols[!crt_cols %in% df_cols] existing_crt <- crt_cols[crt_cols %in% df_cols] cat("\nCRT Source Columns:\n") cat(" Expected: 3 columns\n") cat(" Found:", length(existing_crt), "columns\n") cat(" Missing:", length(missing_crt), "columns\n") if (length(missing_crt) > 0) { cat("\n Missing CRT columns:\n") for (col in missing_crt) { cat(" -", col, "\n") } } # Check target columns missing_targets <- target_cols[!target_cols %in% df_cols] existing_targets <- target_cols[target_cols %in% df_cols] cat("\nTarget Columns:\n") cat(" Expected: 3 columns\n") cat(" Found:", length(existing_targets), "columns\n") cat(" Missing:", length(missing_targets), "columns\n") if (length(missing_targets) > 0) { cat("\n Missing target columns:\n") for (col in missing_targets) { cat(" -", col, "\n") } } cat("\n=== END CHECK ===\n\n") # Stop if critical columns are missing if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) { stop("ERROR: Too many columns missing! Please check column names in CSV file.") } cat("Proceeding with processing...\n\n") # ============= PROCESS AOT SCALE ============= cat("Processing AOT scale...\n") # Convert AOT columns to numeric (handling any non-numeric values) for (col in aot_cols) { if (col %in% names(df)) { df[[col]] <- as.numeric(df[[col]]) } } # Calculate average with reverse coding (WITHOUT modifying original values) # Items 4, 5, 6, 7 are reverse coded for calculation only df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) { # Create a copy for calculation values <- as.numeric(row) # Reverse items 4, 5, 6, 7 (positions in aot_cols vector) reverse_positions <- c(4, 5, 6, 7) values[reverse_positions] <- values[reverse_positions] * -1 # Return mean (na.rm = TRUE handles missing values) mean(values, na.rm = TRUE) }) cat(" AOT total scores calculated (items 4-7 reverse coded for calculation only).\n") cat(" Original AOT item values preserved in dataframe.\n\n") # ============= PROCESS CRT SCALES ============= cat("Processing CRT scales...\n") # Initialize CRT columns df$crt_correct <- NA df$crt_int <- NA # Process each row for (i in 1:nrow(df)) { # CRT Correct crt_correct_count <- 0 crt_correct_n <- 0 for (j in 1:3) { col <- crt_cols[j] if (col %in% names(df)) { response <- trimws(tolower(as.character(df[i, col]))) correct_answer <- tolower(crt_correct_answers[j]) if (!is.na(response) && response != "") { crt_correct_n <- crt_correct_n + 1 if (response == correct_answer) { crt_correct_count <- crt_correct_count + 1 } } } } # Calculate proportion correct if (crt_correct_n > 0) { df$crt_correct[i] <- crt_correct_count / crt_correct_n } # CRT Intuitive crt_int_count <- 0 crt_int_n <- 0 for (j in 1:3) { col <- crt_cols[j] if (col %in% names(df)) { response <- trimws(tolower(as.character(df[i, col]))) intuitive_answer <- tolower(crt_intuitive_answers[j]) if (!is.na(response) && response != "") { crt_int_n <- crt_int_n + 1 if (response == intuitive_answer) { crt_int_count <- crt_int_count + 1 } } } } # Calculate proportion intuitive if (crt_int_n > 0) { df$crt_int[i] <- crt_int_count / crt_int_n } } cat(" CRT correct and intuitive scores calculated.\n\n") cat("=== PROCESSING COMPLETE ===\n\n") # ============= QUALITY ASSURANCE: RANDOM ROW CHECK ============= # This function can be run multiple times to check different random rows qa_check_random_row <- function() { # Pick a random row random_row <- sample(1:nrow(df), 1) cat("\n========================================\n") cat("QA CHECK: Random Row #", random_row, "\n") cat("========================================\n\n") # AOT Check cat("--- AOT SCALE ---\n") cat("Source values (original in CSV):\n") aot_original <- numeric(8) aot_for_calc <- numeric(8) for (i in 1:8) { col <- aot_cols[i] val <- if (col %in% names(df)) df[random_row, col] else NA aot_original[i] <- val # Apply reversal for items 4-7 if (i %in% 4:7) { aot_for_calc[i] <- val * -1 cat(sprintf(" %s: %s (reversed to %s for calculation)\n", col, ifelse(is.na(val), "NA", as.character(val)), ifelse(is.na(val), "NA", as.character(val * -1)))) } else { aot_for_calc[i] <- val cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val)))) } } # Manual calculation check valid_aot <- aot_for_calc[!is.na(aot_for_calc)] if (length(valid_aot) > 0) { expected_mean <- mean(valid_aot) actual_value <- df$aot_total[random_row] cat(sprintf("\nCalculation check:\n")) cat(sprintf(" Sum of reversed values: %s\n", paste(valid_aot, collapse = " + "))) cat(sprintf(" Average of %d valid items: %.5f\n", length(valid_aot), expected_mean)) cat(sprintf(" Target value (aot_total): %.5f\n", actual_value)) cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗"))) } else { cat("\n No valid AOT values to calculate.\n") } # CRT Check cat("\n--- CRT SCALE ---\n") cat("Source values:\n") crt_correct_count <- 0 crt_int_count <- 0 crt_n <- 0 for (i in 1:3) { col <- crt_cols[i] val <- if (col %in% names(df)) as.character(df[random_row, col]) else "" val_trimmed <- trimws(tolower(val)) correct_ans <- crt_correct_answers[i] intuitive_ans <- crt_intuitive_answers[i] is_correct <- val_trimmed == tolower(correct_ans) is_intuitive <- val_trimmed == tolower(intuitive_ans) if (val_trimmed != "" && !is.na(val_trimmed)) { crt_n <- crt_n + 1 if (is_correct) crt_correct_count <- crt_correct_count + 1 if (is_intuitive) crt_int_count <- crt_int_count + 1 } cat(sprintf(" %s: '%s'\n", col, val)) cat(sprintf(" Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct"))) cat(sprintf(" Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive"))) } cat("\nCalculation check:\n") if (crt_n > 0) { expected_correct <- crt_correct_count / crt_n expected_int <- crt_int_count / crt_n actual_correct <- df$crt_correct[random_row] actual_int <- df$crt_int[random_row] cat(sprintf(" Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct)) cat(sprintf(" Target value (crt_correct): %.5f\n", actual_correct)) cat(sprintf(" Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗"))) cat(sprintf("\n Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int)) cat(sprintf(" Target value (crt_int): %.5f\n", actual_int)) cat(sprintf(" Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗"))) } else { cat(" No valid CRT responses to calculate.\n") } cat("\n========================================\n") cat("END QA CHECK\n") cat("========================================\n\n") } # Run QA check on first random row cat("\n\n") qa_check_random_row() # Instructions for running additional checks cat("\n") cat("*** TO CHECK ANOTHER RANDOM ROW ***\n") cat("Run this command in R console:\n") cat(" qa_check_random_row()\n") cat("\n") # Save the modified dataframe back to CSV # na="" writes NA values as empty cells instead of "NA" text # COMMENTED OUT FOR REVIEW - Uncomment when ready to save write.csv(df, "eohi2.csv", row.names = FALSE, na = "") cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n") cat("Review the output above, then uncomment line 253 to save changes.\n") cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")