options(scipen = 999) library(dplyr) setwd("C:/Users/irina/Documents/DND/EOHI/eohi1") df <- read.csv("ehi1.csv") data <- df %>% select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>% filter(demo_sex != "Prefer not to say") print(colSums(is.na(data))) print(sapply(data, class)) # Create dummy variable for sex (0 = Male, 1 = Female) data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0) # Verify the dummy coding print(table(data$demo_sex, data$sex_dummy)) #descriptives # Descriptives for age print(summary(data$demo_age_1)) print(sd(data$demo_age_1, na.rm = TRUE)) # Center demo_age_1 (subtract the mean) data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE) # Verify the centering print(summary(data$age_centered)) # Descriptives for sex (frequency table) print(table(data$demo_sex)) print(prop.table(table(data$demo_sex))) # Descriptives for sex dummy variable print(table(data$sex_dummy)) # Convert edu3 to numeric factor for correlations (1, 2, 3) # First ensure edu3 is a factor, then convert to numeric data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE) data$edu_num <- as.numeric(data$edu3) # Check the numeric conversion print(table(data$edu_num, useNA = "ifany")) # Verify the conversion print(table(data$edu3, data$edu_num, useNA = "ifany")) ####correlation matrix #### # Select numeric variables for correlation matrix numeric_vars <- data %>% select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) # Create Spearman correlation matrix cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman") # Print correlation matrix print(round(cor_matrix, 3)) # Get significance tests for correlations using psych package library(psych) # Create correlation matrix with significance tests cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none") # Print correlation matrix print(round(cor_test$r, 3)) # Print p-values print(round(cor_test$p, 3)) # Print all correlations with r and p values (for reporting) for(i in 1:nrow(cor_test$r)) { for(j in 1:ncol(cor_test$r)) { if(i != j) { # Skip diagonal cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], ": r =", round(cor_test$r[i, j], 3), ", p =", round(cor_test$p[i, j], 3), "\n") } } } # Also print significant correlations summary sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE) if(nrow(sig_cors) > 0) { for(i in 1:nrow(sig_cors)) { row_idx <- sig_cors[i, 1] col_idx <- sig_cors[i, 2] if(row_idx != col_idx) { # Skip diagonal cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], ": r =", round(cor_test$r[row_idx, col_idx], 3), ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n") } } } # Save correlation matrix and p-values to CSV files write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE) write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)