289 lines
9.2 KiB
R
289 lines
9.2 KiB
R
# Load required libraries
|
|
library(Hmisc)
|
|
library(knitr)
|
|
library(dplyr)
|
|
library(corrr)
|
|
library(broom)
|
|
library(purrr)
|
|
library(tidyr)
|
|
library(tibble)
|
|
library(boot)
|
|
|
|
options(scipen = 999)
|
|
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
|
# Load data
|
|
df1 <- read.csv("exp1.csv")
|
|
|
|
# Remove columns with all NA values
|
|
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
|
|
|
# Select variables of interest
|
|
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
|
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
|
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
|
|
|
|
# Create dataset with selected variables
|
|
df <- df1[, c(eohi_vars, cal_vars)]
|
|
|
|
# Ensure all selected variables are numeric
|
|
df <- df %>%
|
|
mutate(across(everything(), as.numeric))
|
|
|
|
# Remove rows with any missing values for correlation analysis
|
|
df_complete <- df[complete.cases(df), ]
|
|
|
|
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
|
cat("Total sample size:", nrow(df), "\n")
|
|
|
|
str(df)
|
|
summary(df)
|
|
####==== DESCRIPTIVE STATISTICS ====
|
|
|
|
# Function to compute descriptive statistics
|
|
get_descriptives <- function(data, vars) {
|
|
desc_stats <- data %>%
|
|
select(all_of(vars)) %>%
|
|
summarise(across(everything(), list(
|
|
n = ~sum(!is.na(.)),
|
|
mean = ~mean(., na.rm = TRUE),
|
|
sd = ~sd(., na.rm = TRUE),
|
|
min = ~min(., na.rm = TRUE),
|
|
max = ~max(., na.rm = TRUE),
|
|
median = ~median(., na.rm = TRUE),
|
|
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
|
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
|
))) %>%
|
|
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
|
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
|
pivot_wider(names_from = stat, values_from = value) %>%
|
|
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
|
|
|
return(desc_stats)
|
|
}
|
|
|
|
# Get descriptives for EOHI variables
|
|
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
|
cat("\n=== EOHI Variables Descriptives ===\n")
|
|
print(eohi_descriptives)
|
|
|
|
# Get descriptives for calibration variables
|
|
cal_descriptives <- get_descriptives(df, cal_vars)
|
|
cat("\n=== Calibration Variables Descriptives ===\n")
|
|
print(cal_descriptives)
|
|
|
|
# Check for bimodal or unusual distributions
|
|
hist(df$eohi_pref)
|
|
hist(df$cal_selfActual)
|
|
|
|
# Look for extreme values
|
|
# boxplot(df$eohi_pref)
|
|
# boxplot(df$cal_selfActual)
|
|
|
|
####==== PEARSON CORRELATIONS ====
|
|
|
|
# Compute correlation matrix with p-values
|
|
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
|
|
|
# Extract correlation coefficients
|
|
cor_pearson <- cor_results_pearson$r
|
|
|
|
# Extract p-values
|
|
p_matrix_pearson <- cor_results_pearson$P
|
|
|
|
# Function to add significance stars
|
|
corstars <- function(cor_mat, p_mat) {
|
|
stars <- ifelse(p_mat < 0.001, "***",
|
|
ifelse(p_mat < 0.01, "**",
|
|
ifelse(p_mat < 0.05, "*", "")))
|
|
|
|
# Combine correlation values with stars, rounded to 5 decimal places
|
|
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
|
nrow = nrow(cor_mat))
|
|
|
|
# Set row and column names
|
|
rownames(cor_with_stars) <- rownames(cor_mat)
|
|
colnames(cor_with_stars) <- colnames(cor_mat)
|
|
|
|
return(cor_with_stars)
|
|
}
|
|
|
|
# Apply the function
|
|
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
|
|
|
cat("\n=== PEARSON CORRELATIONS ===\n")
|
|
print(cor_table_pearson, quote = FALSE)
|
|
|
|
# Extract specific correlations between EOHI and calibration variables
|
|
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
|
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
|
|
|
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
|
for(i in 1:nrow(eohi_cal_correlations)) {
|
|
for(j in 1:ncol(eohi_cal_correlations)) {
|
|
cor_val <- eohi_cal_correlations[i, j]
|
|
p_val <- eohi_cal_pvalues[i, j]
|
|
star <- ifelse(p_val < 0.001, "***",
|
|
ifelse(p_val < 0.01, "**",
|
|
ifelse(p_val < 0.05, "*", "")))
|
|
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
|
rownames(eohi_cal_correlations)[i],
|
|
colnames(eohi_cal_correlations)[j],
|
|
cor_val, star, p_val))
|
|
}
|
|
}
|
|
|
|
####==== SPEARMAN CORRELATIONS ====
|
|
|
|
# Compute Spearman correlation matrix with p-values
|
|
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
|
|
|
# Extract correlation coefficients
|
|
cor_spearman <- cor_results_spearman$r
|
|
|
|
# Extract p-values
|
|
p_matrix_spearman <- cor_results_spearman$P
|
|
|
|
# Apply the function
|
|
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
|
|
|
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
|
print(cor_table_spearman, quote = FALSE)
|
|
|
|
# Extract specific correlations between EOHI and calibration variables
|
|
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
|
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
|
|
|
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
|
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
|
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
|
cor_val <- eohi_cal_correlations_spearman[i, j]
|
|
p_val <- eohi_cal_pvalues_spearman[i, j]
|
|
star <- ifelse(p_val < 0.001, "***",
|
|
ifelse(p_val < 0.01, "**",
|
|
ifelse(p_val < 0.05, "*", "")))
|
|
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
|
rownames(eohi_cal_correlations_spearman)[i],
|
|
colnames(eohi_cal_correlations_spearman)[j],
|
|
cor_val, star, p_val))
|
|
}
|
|
}
|
|
|
|
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
|
|
|
# Function to compute correlation with bootstrap CI
|
|
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
|
# Remove missing values
|
|
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
|
|
|
if(nrow(complete_data) < 3) {
|
|
return(data.frame(
|
|
correlation = NA,
|
|
ci_lower = NA,
|
|
ci_upper = NA,
|
|
n = nrow(complete_data)
|
|
))
|
|
}
|
|
|
|
# Bootstrap function
|
|
boot_fun <- function(data, indices) {
|
|
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
|
}
|
|
|
|
# Perform bootstrap
|
|
set.seed(123) # for reproducibility
|
|
boot_results <- boot(complete_data, boot_fun, R = R)
|
|
|
|
# Calculate confidence interval
|
|
ci <- boot.ci(boot_results, type = "perc")
|
|
|
|
return(data.frame(
|
|
correlation = boot_results$t0,
|
|
ci_lower = ci$perc[4],
|
|
ci_upper = ci$perc[5],
|
|
n = nrow(complete_data)
|
|
))
|
|
}
|
|
|
|
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
|
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
|
bootstrap_results_pearson <- expand.grid(
|
|
eohi_var = eohi_vars,
|
|
cal_var = cal_vars,
|
|
stringsAsFactors = FALSE
|
|
) %>%
|
|
pmap_dfr(function(eohi_var, cal_var) {
|
|
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
|
result$eohi_var <- eohi_var
|
|
result$cal_var <- cal_var
|
|
result$method <- "pearson"
|
|
return(result)
|
|
}) %>%
|
|
mutate(
|
|
correlation = round(correlation, 5),
|
|
ci_lower = round(ci_lower, 5),
|
|
ci_upper = round(ci_upper, 5)
|
|
)
|
|
|
|
print(bootstrap_results_pearson)
|
|
|
|
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
|
bootstrap_results_spearman <- expand.grid(
|
|
eohi_var = eohi_vars,
|
|
cal_var = cal_vars,
|
|
stringsAsFactors = FALSE
|
|
) %>%
|
|
pmap_dfr(function(eohi_var, cal_var) {
|
|
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
|
result$eohi_var <- eohi_var
|
|
result$cal_var <- cal_var
|
|
result$method <- "spearman"
|
|
return(result)
|
|
}) %>%
|
|
mutate(
|
|
correlation = round(correlation, 5),
|
|
ci_lower = round(ci_lower, 5),
|
|
ci_upper = round(ci_upper, 5)
|
|
)
|
|
|
|
print(bootstrap_results_spearman)
|
|
|
|
####==== SUMMARY TABLE ====
|
|
|
|
# Create comprehensive summary table
|
|
summary_table <- bootstrap_results_pearson %>%
|
|
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
|
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
|
left_join(
|
|
bootstrap_results_spearman %>%
|
|
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
|
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
|
by = c("eohi_var", "cal_var")
|
|
) %>%
|
|
# Add p-values
|
|
left_join(
|
|
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
|
pmap_dfr(function(eohi_var, cal_var) {
|
|
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
|
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
|
data.frame(
|
|
eohi_var = eohi_var,
|
|
cal_var = cal_var,
|
|
pearson_p = pearson_p,
|
|
spearman_p = spearman_p
|
|
)
|
|
}),
|
|
by = c("eohi_var", "cal_var")
|
|
) %>%
|
|
mutate(
|
|
pearson_p = round(pearson_p, 5),
|
|
spearman_p = round(spearman_p, 5)
|
|
)
|
|
|
|
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
|
print(summary_table)
|
|
|
|
# Save results to CSV
|
|
# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
|
# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|