eohi/.history/eohi1/descriptives - gen knowledge questions_20250918120727.r

library(tidyverse)
library(boot)

setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")

# Read data
data <- read.csv("exp1.csv")

# Select variables ending exactly with _T or _F
df <- data %>% select(matches("(_T|_F)$"))

# Remove demo_f variable (if present)
df <- df %>% select(-any_of("demo_f"))

str(df)

# Coerce to numeric where possible (without breaking non-numeric)
df_num <- df %>%
  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))

# Helper: bootstrap BCA CI for proportion correct
compute_prop_ci <- function(x, R = 1000, conf = 0.95) {
  x <- suppressWarnings(as.numeric(x))
  x <- x[!is.na(x)]
  n_total <- length(x)
  if (n_total == 0) {
    return(list(n_total = 0L, n_correct = NA_integer_, prop = NA_real_, ci_lower = NA_real_, ci_upper = NA_real_))
  }
  x01 <- ifelse(x == 1, 1, 0)
  n_correct <- sum(x01)
  prop <- n_correct / n_total
  stat <- function(data, indices) mean(data[indices])
  b <- boot::boot(data = x01, statistic = stat, R = R)
  ci <- tryCatch(boot::boot.ci(b, conf = conf, type = "bca"), error = function(e) NULL)
  if (is.null(ci) || is.null(ci$bca)) {
    lower <- NA_real_
    upper <- NA_real_
  } else {
    lower <- ci$bca[4]
    upper <- ci$bca[5]
  }
  list(n_total = n_total, n_correct = n_correct, prop = prop, ci_lower = lower, ci_upper = upper)
}

# Compute count, proportion correct, and 95% BCA CI per variable
descriptives <- purrr::imap_dfr(df_num, function(col, name) {
  res <- compute_prop_ci(col, R = 1000, conf = 0.95)
  tibble(
    variable = name,
    n_total = res$n_total,
    n_correct = res$n_correct,
    prop_correct = round(res$prop, 5),
    ci_lower = round(res$ci_lower, 5),
    ci_upper = round(res$ci_upper, 5)
  )
}) %>%
  arrange(variable)

# View
print(descriptives, n = Inf)

# Optionally save
# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")