88 lines
2.8 KiB
R
88 lines
2.8 KiB
R
library(tidyverse)
|
|
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
|
|
|
# Read data
|
|
data <- read.csv("exp1.csv")
|
|
|
|
# Select variables ending exactly with _T or _F
|
|
df <- data %>% select(matches("(_T|_F)$"))
|
|
|
|
# Remove demo_f variable (if present)
|
|
df <- df %>% select(-any_of("demo_f"))
|
|
|
|
str(df)
|
|
|
|
# Coerce to numeric where possible (without breaking non-numeric)
|
|
df_num <- df %>%
|
|
mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
|
|
|
|
# Compute count and proportion correct per variable
|
|
descriptives <- purrr::imap_dfr(df_num, function(col, name) {
|
|
x <- suppressWarnings(as.numeric(col))
|
|
x <- x[!is.na(x)]
|
|
n_total <- length(x)
|
|
n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
|
|
prop <- if (n_total == 0) NA_real_ else n_correct / n_total
|
|
|
|
# Extract difficulty number from variable name and map to expected range
|
|
difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
|
|
expected_ranges <- list(
|
|
"15" = c(0.15, 0.25),
|
|
"35" = c(0.35, 0.45),
|
|
"55" = c(0.55, 0.65),
|
|
"75" = c(0.75, 0.85)
|
|
)
|
|
|
|
if (as.character(difficulty_num) %in% names(expected_ranges)) {
|
|
expected_range <- expected_ranges[[as.character(difficulty_num)]]
|
|
match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
|
|
} else {
|
|
match_difficulty <- "UNKNOWN"
|
|
}
|
|
|
|
tibble(
|
|
variable = name,
|
|
n_total = n_total,
|
|
n_correct = n_correct,
|
|
prop_correct = round(prop, 5),
|
|
match_difficulty = match_difficulty
|
|
)
|
|
}) %>%
|
|
arrange(variable)
|
|
|
|
# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
|
|
bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
|
|
bin_factor <- cut(
|
|
descriptives$prop_correct,
|
|
breaks = seq(0.10, 1.00, by = 0.10),
|
|
right = FALSE,
|
|
include.lowest = FALSE,
|
|
labels = bin_levels
|
|
)
|
|
bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
|
|
group_by(bin) %>%
|
|
summarise(num_variables = n(), .groups = "drop")
|
|
|
|
# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
|
|
bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
|
|
bin15_factor <- cut(
|
|
descriptives$prop_correct,
|
|
breaks = seq(0.15, 0.95, by = 0.10),
|
|
right = FALSE,
|
|
include.lowest = FALSE,
|
|
labels = bin15_levels
|
|
)
|
|
bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
|
|
group_by(bin) %>%
|
|
summarise(num_variables = n(), .groups = "drop")
|
|
|
|
# View
|
|
print(descriptives, n = Inf)
|
|
cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
|
|
print(bin_counts, n = Inf)
|
|
cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
|
|
print(bin15_counts, n = Inf)
|
|
|
|
# Optionally save
|
|
# readr::write_csv(descriptives, "exp1_TF_descriptives.csv") |