eohi/eohi1/correlations - brier score x eohi and cal.r
2026-01-22 17:55:35 -05:00

81 lines
2.9 KiB
R

# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
wide_cal_eohi <- to_wide(corr_cal_eohi)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
print(corr_cal_eohi)
# Export a single CSV combining all sets
corr_bs_eohi$group <- "BS_vs_EOHI"
corr_bs_cal$group <- "BS_vs_Cal"
corr_cal_eohi$group <- "Cal_vs_EOHI"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)