eohi/.history/eohi1/correlations - eohi x calibration_20250916091406.r
2025-12-23 15:47:09 -05:00

281 lines
9.0 KiB
R

# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")