eohi/eohi1/correlations - brier score x eohi and cal.r

# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)

options(scipen = 999)

setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")

# Keep only required columns for the analysis
bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars  <- c("cal_selfActual","cal_global")

df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))

# --- Brier score correlations vs EOHIs and Calibration ---

# Variables
bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars  <- c("cal_selfActual","cal_global")

# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
  results <- purrr::pmap_dfr(grid, function(x, y) {
    xv <- df[[x]]; yv <- df[[y]]
    ok <- is.finite(xv) & is.finite(yv)
    if (sum(ok) < 3) {
      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
    }
    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
  })
  dplyr::arrange(results, var_x, var_y)
}

# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")

# Wide r-only tables (optional)
to_wide <- function(d) {
  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal  <- to_wide(corr_bs_cal)
wide_cal_eohi <- to_wide(corr_cal_eohi)

# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
print(corr_cal_eohi)

# Export a single CSV combining all sets
corr_bs_eohi$group   <- "BS_vs_EOHI"
corr_bs_cal$group    <- "BS_vs_Cal"
corr_cal_eohi$group  <- "Cal_vs_EOHI"

corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
  dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)