# Assumption Checks Before Cronbach's Alpha Analysis # Run this BEFORE the main reliability analysis library(psych) library(corrplot) library(ggplot2) # Read the data data <- read.csv("exp1.csv") # Define scale variables past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel") past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex") past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice") past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change") # Function to check assumptions for a scale check_assumptions <- function(data, var_names, scale_name) { cat("\n", "="*60, "\n") cat("ASSUMPTION CHECKS FOR:", scale_name, "\n") cat("="*60, "\n") # Get scale data scale_data <- data[, var_names] # 1. Sample size check complete_cases <- sum(complete.cases(scale_data)) cat("1. SAMPLE SIZE CHECK:\n") cat(" Total participants:", nrow(data), "\n") cat(" Complete cases:", complete_cases, "\n") cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n") if(complete_cases < 30) { cat(" WARNING: Sample size too small for reliable alpha estimates\n") return(FALSE) } # 2. Missing data check cat("\n2. MISSING DATA CHECK:\n") missing_counts <- colSums(is.na(scale_data)) missing_pct <- round(missing_counts / nrow(data) * 100, 2) cat(" Missing data by item:\n") for(i in 1:length(var_names)) { cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n") } max_missing <- max(missing_pct) cat(" Maximum missing:", max_missing, "%\n") cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n") # 3. Use only complete cases for remaining checks complete_data <- scale_data[complete.cases(scale_data), ] # 4. Normality check (Shapiro-Wilk test on first item as example) cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n") if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit shapiro_result <- shapiro.test(complete_data[, 1]) cat(" p-value:", round(shapiro_result$p.value, 4), "\n") cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n") } else { cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n") } # 5. Inter-item correlations check cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n") cor_matrix <- cor(complete_data) # Get off-diagonal correlations cor_matrix[lower.tri(cor_matrix)] <- NA diag(cor_matrix) <- NA cors <- as.vector(cor_matrix) cors <- cors[!is.na(cors)] positive_cors <- sum(cors > 0) strong_cors <- sum(cors > 0.30) negative_cors <- sum(cors < 0) cat(" Total correlations:", length(cors), "\n") cat(" Positive correlations:", positive_cors, "\n") cat(" Strong correlations (>0.30):", strong_cors, "\n") cat(" Negative correlations:", negative_cors, "\n") cat(" Mean correlation:", round(mean(cors), 4), "\n") cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n") if(negative_cors > 0) { cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n") } if(strong_cors / length(cors) < 0.5) { cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n") } # 6. Item variance check cat("\n5. ITEM VARIANCE CHECK:\n") item_vars <- apply(complete_data, 2, var) var_ratio <- max(item_vars) / min(item_vars) cat(" Item variances:", round(item_vars, 4), "\n") cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n") cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n") # 7. Outlier check cat("\n6. OUTLIER CHECK:\n") # Check for multivariate outliers using Mahalanobis distance if(nrow(complete_data) > ncol(complete_data)) { mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data)) outlier_threshold <- qchisq(0.999, df = ncol(complete_data)) outliers <- sum(mahal_dist > outlier_threshold) cat(" Multivariate outliers (p<0.001):", outliers, "\n") cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n") } # 8. Summary recommendation cat("\n7. OVERALL RECOMMENDATION:\n") issues <- 0 if(complete_cases < 30) issues <- issues + 1 if(max_missing >= 20) issues <- issues + 1 if(negative_cors > 0) issues <- issues + 1 if(var_ratio >= 4) issues <- issues + 1 if(issues == 0) { cat(" ✓ PROCEED with Cronbach's alpha analysis\n") } else if(issues <= 2) { cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n") } else { cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n") } return(TRUE) } # Check assumptions for all past scales cat("CRONBACH'S ALPHA ASSUMPTION CHECKS") cat("\nData: exp1.csv") cat("\nTotal sample size:", nrow(data)) check_assumptions(data, past_pref_vars, "Past Preferences") check_assumptions(data, past_pers_vars, "Past Personality") check_assumptions(data, past_val_vars, "Past Values") check_assumptions(data, past_life_vars, "Past Life Satisfaction") # Quick check of future scales (you can expand this) fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel") check_assumptions(data, fut_pref_vars, "Future Preferences") cat("\n", "="*60, "\n") cat("GENERAL GUIDELINES:\n") cat("="*60, "\n") cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n") cat("⚠️ If some assumptions are violated, interpret with caution\n") cat("✗ If many assumptions are violated, consider alternative approaches:\n") cat(" - Omega coefficient (more robust to violations)\n") cat(" - Split-half reliability\n") cat(" - Test-retest reliability\n") cat(" - Factor analysis to check dimensionality\n")