# Load required libraries library(Hmisc) library(knitr) library(dplyr) library(corrr) library(broom) library(purrr) library(tidyr) library(tibble) library(boot) options(scipen = 999) setwd("C:/Users/irina/Documents/DND/EOHI/eohi1") # Load data df1 <- read.csv("exp1.csv") # Remove columns with all NA values df1 <- df1 %>% select(where(~ !all(is.na(.)))) # Select variables of interest eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean") cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false") # Create dataset with selected variables df <- df1[, c(eohi_vars, cal_vars)] # Ensure all selected variables are numeric df <- df %>% mutate(across(everything(), as.numeric)) # Remove rows with any missing values for correlation analysis df_complete <- df[complete.cases(df), ] cat("Sample size for correlation analysis:", nrow(df_complete), "\n") cat("Total sample size:", nrow(df), "\n") str(df) summary(df) ####==== DESCRIPTIVE STATISTICS ==== # Function to compute descriptive statistics get_descriptives <- function(data, vars) { desc_stats <- data %>% select(all_of(vars)) %>% summarise(across(everything(), list( n = ~sum(!is.na(.)), mean = ~mean(., na.rm = TRUE), sd = ~sd(., na.rm = TRUE), min = ~min(., na.rm = TRUE), max = ~max(., na.rm = TRUE), median = ~median(., na.rm = TRUE), q25 = ~quantile(., 0.25, na.rm = TRUE), q75 = ~quantile(., 0.75, na.rm = TRUE) ))) %>% pivot_longer(everything(), names_to = "variable", values_to = "value") %>% separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>% pivot_wider(names_from = stat, values_from = value) %>% mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5))) return(desc_stats) } # Get descriptives for EOHI variables eohi_descriptives <- get_descriptives(df, eohi_vars) cat("\n=== EOHI Variables Descriptives ===\n") print(eohi_descriptives) # Get descriptives for calibration variables cal_descriptives <- get_descriptives(df, cal_vars) cat("\n=== Calibration Variables Descriptives ===\n") print(cal_descriptives) ####==== PEARSON CORRELATIONS ==== # Compute correlation matrix with p-values cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson") # Extract correlation coefficients cor_pearson <- cor_results_pearson$r # Extract p-values p_matrix_pearson <- cor_results_pearson$P # Function to add significance stars corstars <- function(cor_mat, p_mat) { stars <- ifelse(p_mat < 0.001, "***", ifelse(p_mat < 0.01, "**", ifelse(p_mat < 0.05, "*", ""))) # Combine correlation values with stars, rounded to 5 decimal places cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars), nrow = nrow(cor_mat)) # Set row and column names rownames(cor_with_stars) <- rownames(cor_mat) colnames(cor_with_stars) <- colnames(cor_mat) return(cor_with_stars) } # Apply the function cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson) cat("\n=== PEARSON CORRELATIONS ===\n") print(cor_table_pearson, quote = FALSE) # Extract specific correlations between EOHI and calibration variables eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars] eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars] cat("\n=== EOHI x Calibration Pearson Correlations ===\n") for(i in 1:nrow(eohi_cal_correlations)) { for(j in 1:ncol(eohi_cal_correlations)) { cor_val <- eohi_cal_correlations[i, j] p_val <- eohi_cal_pvalues[i, j] star <- ifelse(p_val < 0.001, "***", ifelse(p_val < 0.01, "**", ifelse(p_val < 0.05, "*", ""))) cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", rownames(eohi_cal_correlations)[i], colnames(eohi_cal_correlations)[j], cor_val, star, p_val)) } } ####==== SPEARMAN CORRELATIONS ==== # Compute Spearman correlation matrix with p-values cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman") # Extract correlation coefficients cor_spearman <- cor_results_spearman$r # Extract p-values p_matrix_spearman <- cor_results_spearman$P # Apply the function cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman) cat("\n=== SPEARMAN CORRELATIONS ===\n") print(cor_table_spearman, quote = FALSE) # Extract specific correlations between EOHI and calibration variables eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars] eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars] cat("\n=== EOHI x Calibration Spearman Correlations ===\n") for(i in 1:nrow(eohi_cal_correlations_spearman)) { for(j in 1:ncol(eohi_cal_correlations_spearman)) { cor_val <- eohi_cal_correlations_spearman[i, j] p_val <- eohi_cal_pvalues_spearman[i, j] star <- ifelse(p_val < 0.001, "***", ifelse(p_val < 0.01, "**", ifelse(p_val < 0.05, "*", ""))) cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", rownames(eohi_cal_correlations_spearman)[i], colnames(eohi_cal_correlations_spearman)[j], cor_val, star, p_val)) } } ####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ==== # Function to compute correlation with bootstrap CI bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) { # Remove missing values complete_data <- data[complete.cases(data[, c(var1, var2)]), ] if(nrow(complete_data) < 3) { return(data.frame( correlation = NA, ci_lower = NA, ci_upper = NA, n = nrow(complete_data) )) } # Bootstrap function boot_fun <- function(data, indices) { cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs") } # Perform bootstrap set.seed(123) # for reproducibility boot_results <- boot(complete_data, boot_fun, R = R) # Calculate confidence interval ci <- boot.ci(boot_results, type = "perc") return(data.frame( correlation = boot_results$t0, ci_lower = ci$perc[4], ci_upper = ci$perc[5], n = nrow(complete_data) )) } # Compute bootstrap CIs for all EOHI x Calibration correlations cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n") bootstrap_results_pearson <- expand.grid( eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE ) %>% pmap_dfr(function(eohi_var, cal_var) { result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000) result$eohi_var <- eohi_var result$cal_var <- cal_var result$method <- "pearson" return(result) }) %>% mutate( correlation = round(correlation, 5), ci_lower = round(ci_lower, 5), ci_upper = round(ci_upper, 5) ) print(bootstrap_results_pearson) cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n") bootstrap_results_spearman <- expand.grid( eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE ) %>% pmap_dfr(function(eohi_var, cal_var) { result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000) result$eohi_var <- eohi_var result$cal_var <- cal_var result$method <- "spearman" return(result) }) %>% mutate( correlation = round(correlation, 5), ci_lower = round(ci_lower, 5), ci_upper = round(ci_upper, 5) ) print(bootstrap_results_spearman) ####==== SUMMARY TABLE ==== # Create comprehensive summary table summary_table <- bootstrap_results_pearson %>% select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>% rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>% left_join( bootstrap_results_spearman %>% select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>% rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper), by = c("eohi_var", "cal_var") ) %>% # Add p-values left_join( expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>% pmap_dfr(function(eohi_var, cal_var) { pearson_p <- p_matrix_pearson[eohi_var, cal_var] spearman_p <- p_matrix_spearman[eohi_var, cal_var] data.frame( eohi_var = eohi_var, cal_var = cal_var, pearson_p = pearson_p, spearman_p = spearman_p ) }), by = c("eohi_var", "cal_var") ) %>% mutate( pearson_p = round(pearson_p, 5), spearman_p = round(spearman_p, 5) ) cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n") print(summary_table) # Save results to CSV write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE) cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n") ####==== EFFECT SIZES (Cohen's conventions) ==== cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n") cat("Small effect: |r| = 0.10\n") cat("Medium effect: |r| = 0.30\n") cat("Large effect: |r| = 0.50\n") # Categorize effect sizes summary_table_with_effects <- summary_table %>% mutate( pearson_effect_size = case_when( abs(pearson_r) >= 0.50 ~ "Large", abs(pearson_r) >= 0.30 ~ "Medium", abs(pearson_r) >= 0.10 ~ "Small", TRUE ~ "Negligible" ), spearman_effect_size = case_when( abs(spearman_rho) >= 0.50 ~ "Large", abs(spearman_rho) >= 0.30 ~ "Medium", abs(spearman_rho) >= 0.10 ~ "Small", TRUE ~ "Negligible" ) ) cat("\n=== EFFECT SIZE CATEGORIZATION ===\n") print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))