eohi/eohi1/e1 - reliability ehi.r

setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")

options(scipen = 999)

df <- read.csv("ehi1.csv")
library(psych)
library(knitr)

# fixed-decimal formatter (five decimals)
fmt5 <- function(x) formatC(x, format = "f", digits = 5)

# Select the 4 variables for reliability analysis
reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]),
                      c("eohiDGEN_mean", "ehi_global_mean")]

# Cronbach's Alpha
alpha_result <- alpha(reliability_data)

# Split-half reliability
split_half <- splitHalf(reliability_data)

# Inter-item correlations
cor_matrix <- cor(reliability_data, use = "complete.obs")

# Two-item reliability summary (if applicable)
two_item_section <- ""
if (ncol(reliability_data) == 2) {
  n_complete <- sum(complete.cases(reliability_data))
  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
  r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
  # Fisher z CI for r
  fisher_z <- atanh(r_pearson)
  se_z <- 1 / sqrt(n_complete - 3)
  z_crit <- qnorm(0.975)
  ci_z_lower <- fisher_z - z_crit * se_z
  ci_z_upper <- fisher_z + z_crit * se_z
  ci_r_lower <- tanh(ci_z_lower)
  ci_r_upper <- tanh(ci_z_upper)
  # Approximate Fisher z CI for Spearman rho (large-sample approximation)
  fisher_z_s <- atanh(r_spearman)
  ci_zs_lower <- fisher_z_s - z_crit * se_z
  ci_zs_upper <- fisher_z_s + z_crit * se_z
  ci_s_lower <- tanh(ci_zs_lower)
  ci_s_upper <- tanh(ci_zs_upper)
  # Spearman–Brown/Cronbach's alpha for k = 2
  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)

  two_item_section <- paste0(
    "<h2>Two-Item Reliability Summary</h2>",
    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
    "<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
  )
}

# Create a summary table
summary_table <- data.frame(
  Variable = names(reliability_data),
  n = nrow(reliability_data),
  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
)

html_output <- paste0(
  "<html><head><title>EHI Reliability Analysis</title></head><body>",
  "<h1>EHI Reliability Analysis</h1>",
  two_item_section,

  "<h2>Cronbach's Alpha</h2>",
  kable(alpha_result$total, format = "html"),

  "<h2>Split-Half Reliability</h2>",
  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",

  "<h2>Item-Level Statistics</h2>",
  kable(alpha_result$item.stats, format = "html"),


  "</body></html>"
)

writeLines(html_output, "EHI reliability.html")