163 lines
6.4 KiB
R
163 lines
6.4 KiB
R
# Assumption Checks Before Cronbach's Alpha Analysis
|
|
# Run this BEFORE the main reliability analysis
|
|
|
|
library(psych)
|
|
library(corrplot)
|
|
library(ggplot2)
|
|
|
|
# Read the data
|
|
data <- read.csv("exp1.csv")
|
|
|
|
# Define scale variables
|
|
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
|
|
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
|
|
|
|
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
|
|
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
|
|
|
|
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
|
|
"NPastDiff_val_performance", "NPastDiff_val_justice")
|
|
|
|
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
|
|
"NPastDiff_life_important", "NPastDiff_life_change")
|
|
|
|
# Function to check assumptions for a scale
|
|
check_assumptions <- function(data, var_names, scale_name) {
|
|
cat("\n", "="*60, "\n")
|
|
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
|
|
cat("="*60, "\n")
|
|
|
|
# Get scale data
|
|
scale_data <- data[, var_names]
|
|
|
|
# 1. Sample size check
|
|
complete_cases <- sum(complete.cases(scale_data))
|
|
cat("1. SAMPLE SIZE CHECK:\n")
|
|
cat(" Total participants:", nrow(data), "\n")
|
|
cat(" Complete cases:", complete_cases, "\n")
|
|
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
|
|
|
|
if(complete_cases < 30) {
|
|
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
|
|
return(FALSE)
|
|
}
|
|
|
|
# 2. Missing data check
|
|
cat("\n2. MISSING DATA CHECK:\n")
|
|
missing_counts <- colSums(is.na(scale_data))
|
|
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
|
|
cat(" Missing data by item:\n")
|
|
for(i in 1:length(var_names)) {
|
|
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
|
|
}
|
|
|
|
max_missing <- max(missing_pct)
|
|
cat(" Maximum missing:", max_missing, "%\n")
|
|
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
|
|
|
|
# 3. Use only complete cases for remaining checks
|
|
complete_data <- scale_data[complete.cases(scale_data), ]
|
|
|
|
# 4. Normality check (Shapiro-Wilk test on first item as example)
|
|
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
|
|
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
|
|
shapiro_result <- shapiro.test(complete_data[, 1])
|
|
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
|
|
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
|
|
} else {
|
|
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
|
|
}
|
|
|
|
# 5. Inter-item correlations check
|
|
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
|
|
cor_matrix <- cor(complete_data)
|
|
|
|
# Get off-diagonal correlations
|
|
cor_matrix[lower.tri(cor_matrix)] <- NA
|
|
diag(cor_matrix) <- NA
|
|
cors <- as.vector(cor_matrix)
|
|
cors <- cors[!is.na(cors)]
|
|
|
|
positive_cors <- sum(cors > 0)
|
|
strong_cors <- sum(cors > 0.30)
|
|
negative_cors <- sum(cors < 0)
|
|
|
|
cat(" Total correlations:", length(cors), "\n")
|
|
cat(" Positive correlations:", positive_cors, "\n")
|
|
cat(" Strong correlations (>0.30):", strong_cors, "\n")
|
|
cat(" Negative correlations:", negative_cors, "\n")
|
|
cat(" Mean correlation:", round(mean(cors), 4), "\n")
|
|
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
|
|
|
|
if(negative_cors > 0) {
|
|
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
|
|
}
|
|
if(strong_cors / length(cors) < 0.5) {
|
|
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
|
|
}
|
|
|
|
# 6. Item variance check
|
|
cat("\n5. ITEM VARIANCE CHECK:\n")
|
|
item_vars <- apply(complete_data, 2, var)
|
|
var_ratio <- max(item_vars) / min(item_vars)
|
|
cat(" Item variances:", round(item_vars, 4), "\n")
|
|
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
|
|
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
|
|
|
|
# 7. Outlier check
|
|
cat("\n6. OUTLIER CHECK:\n")
|
|
# Check for multivariate outliers using Mahalanobis distance
|
|
if(nrow(complete_data) > ncol(complete_data)) {
|
|
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
|
|
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
|
|
outliers <- sum(mahal_dist > outlier_threshold)
|
|
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
|
|
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
|
|
}
|
|
|
|
# 8. Summary recommendation
|
|
cat("\n7. OVERALL RECOMMENDATION:\n")
|
|
issues <- 0
|
|
if(complete_cases < 30) issues <- issues + 1
|
|
if(max_missing >= 20) issues <- issues + 1
|
|
if(negative_cors > 0) issues <- issues + 1
|
|
if(var_ratio >= 4) issues <- issues + 1
|
|
|
|
if(issues == 0) {
|
|
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
|
|
} else if(issues <= 2) {
|
|
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
|
|
} else {
|
|
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
|
|
}
|
|
|
|
return(TRUE)
|
|
}
|
|
|
|
# Check assumptions for all past scales
|
|
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
|
|
cat("\nData: exp1.csv")
|
|
cat("\nTotal sample size:", nrow(data))
|
|
|
|
check_assumptions(data, past_pref_vars, "Past Preferences")
|
|
check_assumptions(data, past_pers_vars, "Past Personality")
|
|
check_assumptions(data, past_val_vars, "Past Values")
|
|
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
|
|
|
|
# Quick check of future scales (you can expand this)
|
|
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
|
|
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
|
|
|
check_assumptions(data, fut_pref_vars, "Future Preferences")
|
|
|
|
cat("\n", "="*60, "\n")
|
|
cat("GENERAL GUIDELINES:\n")
|
|
cat("="*60, "\n")
|
|
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
|
|
cat("⚠️ If some assumptions are violated, interpret with caution\n")
|
|
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
|
|
cat(" - Omega coefficient (more robust to violations)\n")
|
|
cat(" - Split-half reliability\n")
|
|
cat(" - Test-retest reliability\n")
|
|
cat(" - Factor analysis to check dimensionality\n")
|