eohi/.history/eohi1/descriptives - gen knowledge questions_20250918120727.r
2025-12-23 15:47:09 -05:00

63 lines
1.9 KiB
R

library(tidyverse)
library(boot)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read data
data <- read.csv("exp1.csv")
# Select variables ending exactly with _T or _F
df <- data %>% select(matches("(_T|_F)$"))
# Remove demo_f variable (if present)
df <- df %>% select(-any_of("demo_f"))
str(df)
# Coerce to numeric where possible (without breaking non-numeric)
df_num <- df %>%
mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
# Helper: bootstrap BCA CI for proportion correct
compute_prop_ci <- function(x, R = 1000, conf = 0.95) {
x <- suppressWarnings(as.numeric(x))
x <- x[!is.na(x)]
n_total <- length(x)
if (n_total == 0) {
return(list(n_total = 0L, n_correct = NA_integer_, prop = NA_real_, ci_lower = NA_real_, ci_upper = NA_real_))
}
x01 <- ifelse(x == 1, 1, 0)
n_correct <- sum(x01)
prop <- n_correct / n_total
stat <- function(data, indices) mean(data[indices])
b <- boot::boot(data = x01, statistic = stat, R = R)
ci <- tryCatch(boot::boot.ci(b, conf = conf, type = "bca"), error = function(e) NULL)
if (is.null(ci) || is.null(ci$bca)) {
lower <- NA_real_
upper <- NA_real_
} else {
lower <- ci$bca[4]
upper <- ci$bca[5]
}
list(n_total = n_total, n_correct = n_correct, prop = prop, ci_lower = lower, ci_upper = upper)
}
# Compute count, proportion correct, and 95% BCA CI per variable
descriptives <- purrr::imap_dfr(df_num, function(col, name) {
res <- compute_prop_ci(col, R = 1000, conf = 0.95)
tibble(
variable = name,
n_total = res$n_total,
n_correct = res$n_correct,
prop_correct = round(res$prop, 5),
ci_lower = round(res$ci_lower, 5),
ci_upper = round(res$ci_upper, 5)
)
}) %>%
arrange(variable)
# View
print(descriptives, n = Inf)
# Optionally save
# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")