eohi/.history/eohi1/correlation matrix_20251027134544.r
2025-12-23 15:47:09 -05:00

105 lines
3.3 KiB
R

options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")