195 lines
6.7 KiB
R
195 lines
6.7 KiB
R
options(scipen = 999)
|
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
|
|
|
# Load required libraries
|
|
library(corrplot)
|
|
library(Hmisc)
|
|
library(psych)
|
|
|
|
# Load the data
|
|
exp1_data <- read.csv("exp1.csv")
|
|
|
|
# Define the two sets of variables
|
|
set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
|
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
|
|
set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
|
|
|
|
# Create subset with only the variables of interest
|
|
correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
|
|
|
|
# ===== NORMALITY CHECKS =====
|
|
# Shapiro-Wilk tests for normality (n < 5000)
|
|
for(var in names(correlation_data)) {
|
|
if(length(na.omit(correlation_data[[var]])) <= 5000) {
|
|
shapiro_result <- shapiro.test(correlation_data[[var]])
|
|
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
|
|
var, shapiro_result$p.value,
|
|
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
|
}
|
|
}
|
|
|
|
# Kolmogorov-Smirnov test for normality
|
|
for(var in names(correlation_data)) {
|
|
ks_result <- ks.test(correlation_data[[var]], "pnorm",
|
|
mean = mean(correlation_data[[var]], na.rm = TRUE),
|
|
sd = sd(correlation_data[[var]], na.rm = TRUE))
|
|
cat(sprintf("%s: KS p = %.5f %s\n",
|
|
var, ks_result$p.value,
|
|
ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
|
}
|
|
|
|
# Visual normality checks
|
|
pdf("normality_plots.pdf", width = 12, height = 8)
|
|
par(mfrow = c(2, 4))
|
|
for(var in names(correlation_data)) {
|
|
# Histogram with normal curve overlay
|
|
hist(correlation_data[[var]], main = paste("Histogram:", var),
|
|
xlab = var, freq = FALSE)
|
|
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
|
|
sd = sd(correlation_data[[var]], na.rm = TRUE)),
|
|
add = TRUE, col = "red", lwd = 2)
|
|
|
|
# Q-Q plot
|
|
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
|
|
qqline(correlation_data[[var]], col = "red", lwd = 2)
|
|
}
|
|
dev.off()
|
|
|
|
# ===== LINEARITY CHECKS =====
|
|
# Check linearity between variable pairs
|
|
pdf("linearity_plots.pdf", width = 15, height = 10)
|
|
par(mfrow = c(3, 5))
|
|
for(i in 1:length(set1_vars)) {
|
|
for(j in 1:length(set2_vars)) {
|
|
var1 <- set1_vars[i]
|
|
var2 <- set2_vars[j]
|
|
|
|
# Scatter plot with regression line
|
|
plot(correlation_data[[var1]], correlation_data[[var2]],
|
|
main = paste(var1, "vs", var2),
|
|
xlab = var1, ylab = var2, pch = 16, cex = 0.6)
|
|
|
|
# Add linear regression line
|
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
|
abline(lm_fit, col = "red", lwd = 2)
|
|
|
|
# Add LOESS smooth line for non-linear pattern detection
|
|
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
|
|
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
|
|
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
|
|
loess_pred <- predict(loess_fit, x_seq)
|
|
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
|
|
|
|
# Calculate R-squared for linear fit
|
|
r_squared <- summary(lm_fit)$r.squared
|
|
cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
|
|
}
|
|
}
|
|
dev.off()
|
|
|
|
# Residual analysis for linearity
|
|
pdf("residual_plots.pdf", width = 15, height = 10)
|
|
par(mfrow = c(3, 5))
|
|
for(i in 1:length(set1_vars)) {
|
|
for(j in 1:length(set2_vars)) {
|
|
var1 <- set1_vars[i]
|
|
var2 <- set2_vars[j]
|
|
|
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
|
residuals <- residuals(lm_fit)
|
|
fitted <- fitted(lm_fit)
|
|
|
|
plot(fitted, residuals,
|
|
main = paste("Residuals:", var1, "vs", var2),
|
|
xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
|
|
abline(h = 0, col = "red", lwd = 2)
|
|
|
|
# Add smooth line to residuals
|
|
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
|
|
}
|
|
}
|
|
dev.off()
|
|
|
|
|
|
# Calculate correlation matrices (both Pearson and Spearman)
|
|
cor_matrix_pearson <- cor(correlation_data, method = "pearson")
|
|
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
|
|
|
|
# Use Spearman as primary method
|
|
cor_matrix <- cor_matrix_spearman
|
|
|
|
# Print correlation matrices with 5 decimal places
|
|
print(round(cor_matrix_spearman, 5))
|
|
|
|
# Separate correlations between the two sets (Spearman)
|
|
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
|
|
print(round(set1_set2_cor, 5))
|
|
|
|
# Calculate correlations within each set (Spearman)
|
|
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
|
|
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
|
|
|
|
# Statistical significance tests (Spearman)
|
|
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
|
|
|
|
for(i in 1:length(set1_vars)) {
|
|
for(j in 1:length(set2_vars)) {
|
|
var1 <- set1_vars[i]
|
|
var2 <- set2_vars[j]
|
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
|
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
|
|
}
|
|
}
|
|
|
|
# Create correlation plots for both methods
|
|
pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
|
|
corrplot(cor_matrix_spearman, method = "color", type = "upper",
|
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
|
addCoef.col = "black", number.cex = 0.7,
|
|
title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
|
|
dev.off()
|
|
|
|
pdf("correlation_plot_scales_pearson.pdf", width = 10, height = 8)
|
|
corrplot(cor_matrix_pearson, method = "color", type = "upper",
|
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
|
addCoef.col = "black", number.cex = 0.7,
|
|
title = "Pearson Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
|
|
dev.off()
|
|
|
|
# Summary statistics
|
|
desc_stats <- describe(correlation_data)
|
|
print(round(desc_stats, 5))
|
|
|
|
# Save results to CSV files
|
|
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
|
|
write.csv(round(cor_matrix_pearson, 5), "pearson_correlations.csv")
|
|
write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
|
|
|
|
# Save correlation results in a formatted table
|
|
cor_results <- data.frame(
|
|
Variable1 = character(),
|
|
Variable2 = character(),
|
|
Spearman_r = numeric(),
|
|
P_value = numeric(),
|
|
stringsAsFactors = FALSE
|
|
)
|
|
|
|
# Extract significant correlations between sets
|
|
for(i in 1:length(set1_vars)) {
|
|
for(j in 1:length(set2_vars)) {
|
|
var1 <- set1_vars[i]
|
|
var2 <- set2_vars[j]
|
|
r_val <- cor_matrix_spearman[var1, var2]
|
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
|
|
|
cor_results <- rbind(cor_results, data.frame(
|
|
Variable1 = var1,
|
|
Variable2 = var2,
|
|
Spearman_r = r_val,
|
|
P_value = p_val,
|
|
stringsAsFactors = FALSE
|
|
))
|
|
}
|
|
}
|
|
|
|
write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE) |