eohi/.history/eohi1/descriptives - gen knowledge questions_20250918122358.r
2025-12-23 15:47:09 -05:00

55 lines
1.6 KiB
R

library(tidyverse)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read data
data <- read.csv("exp1.csv")
# Select variables ending exactly with _T or _F
df <- data %>% select(matches("(_T|_F)$"))
# Remove demo_f variable (if present)
df <- df %>% select(-any_of("demo_f"))
str(df)
# Coerce to numeric where possible (without breaking non-numeric)
df_num <- df %>%
mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
# Compute count and proportion correct per variable
descriptives <- purrr::imap_dfr(df_num, function(col, name) {
x <- suppressWarnings(as.numeric(col))
x <- x[!is.na(x)]
n_total <- length(x)
n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
prop <- if (n_total == 0) NA_real_ else n_correct / n_total
tibble(
variable = name,
n_total = n_total,
n_correct = n_correct,
prop_correct = round(prop, 5)
)
}) %>%
arrange(variable)
# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
bin_factor <- cut(
descriptives$prop_correct,
breaks = seq(0.10, 1.00, by = 0.10),
right = FALSE,
include.lowest = FALSE,
labels = bin_levels
)
bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
group_by(bin) %>%
summarise(num_variables = n(), .groups = "drop")
# View
print(descriptives, n = Inf)
cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
print(bin_counts, n = Inf)
# Optionally save
# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")