eohi3 update

2026-01-22 17:55:35 -05:00 · 2026-01-22 17:55:35 -05:00 · 4c3d96a61b
commit 4c3d96a61b
178 changed files with 24078 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
 .history
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,55 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "type": "R-Debugger",
            "name": "Launch R-Workspace",
            "request": "launch",
            "debugMode": "workspace",
            "workingDirectory": "${workspaceFolder}",
            "splitOverwrittenOutput": true
        },
        {
            "type": "R-Debugger",
            "name": "Debug R-File",
            "request": "launch",
            "debugMode": "file",
            "workingDirectory": "${workspaceFolder}",
            "file": "${file}",
            "splitOverwrittenOutput": true,
            "stopOnEntry": false
        },
        {
            "type": "R-Debugger",
            "name": "Debug R-Function",
            "request": "launch",
            "debugMode": "function",
            "workingDirectory": "${workspaceFolder}",
            "file": "${file}",
            "mainFunction": "main",
            "allowGlobalDebugging": false,
            "splitOverwrittenOutput": true
        },
        {
            "type": "R-Debugger",
            "name": "Debug R-Package",
            "request": "launch",
            "debugMode": "workspace",
            "workingDirectory": "${workspaceFolder}",
            "includePackageScopes": true,
            "loadPackages": [
                "."
            ],
            "splitOverwrittenOutput": true
        },
        {
            "type": "R-Debugger",
            "request": "attach",
            "name": "Attach to R process",
            "splitOverwrittenOutput": true
        }
    ]
 }
--- a/tables.pptx
+++ b/tables.pptx
--- a/presentation.docx
+++ b/presentation.docx
--- a/eohi1/BS_means.vb
+++ b/eohi1/BS_means.vb
@ -0,0 +1,118 @@
 Option Explicit
 Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
    Dim lastCol As Long
    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
    Dim m As Variant
    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
    If IsError(m) Then
        GetColIndex = 0
    Else
        GetColIndex = CLng(m)
    End If
 End Function
 Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
    Dim tmp() As Long
    ReDim tmp(0 To UBound(headers))
    Dim i As Long, c As Long
    c = 0
    For i = LBound(headers) To UBound(headers)
        Dim colIdx As Long
        colIdx = GetColIndex(CStr(headers(i)), ws)
        If colIdx > 0 Then
            tmp(c) = colIdx
            c = c + 1
        End If
    Next i
    If c = 0 Then
        BuildPresentColArray = Array()
    Else
        Dim outArr() As Long
        ReDim outArr(0 To c - 1)
        For i = 0 To c - 1
            outArr(i) = tmp(i)
        Next i
        BuildPresentColArray = outArr
    End If
 End Function
 Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
    Dim i As Long
    Dim sumVals As Double
    Dim countVals As Long
    sumVals = 0
    countVals = 0
    If IsArray(colIndexes) Then
        For i = LBound(colIndexes) To UBound(colIndexes)
            Dim v As Variant
            v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
            If Not IsError(v) Then
                If IsNumeric(v) Then
                    sumVals = sumVals + CDbl(v)
                    countVals = countVals + 1
                End If
            End If
        Next i
    End If
    If countVals = 0 Then
        MeanOfRow = CVErr(xlErrNA)
    Else
        MeanOfRow = sumVals / countVals
    End If
 End Function
 Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
    Dim c As Long
    c = GetColIndex(headerName, ws)
    If c = 0 Then
        Dim lastCol As Long
        lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
        c = lastCol + 1
        ws.Cells(1, c).Value = headerName
    End If
    EnsureOutputColumn = c
 End Function
 Sub BS_Means()
    Dim ws As Worksheet
    Set ws = ThisWorkbook.Sheets(1)
    Dim all28 As Variant
    all28 = Array( _
        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
    )
    Dim easy14 As Variant
    easy14 = Array( _
        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
    )
    Dim hard14 As Variant
    hard14 = Array( _
        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
    )
    Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
    colsAll = BuildPresentColArray(all28, ws)
    colsEasy = BuildPresentColArray(easy14, ws)
    colsHard = BuildPresentColArray(hard14, ws)
    Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
    colBS28 = EnsureOutputColumn(ws, "bs_28")
    colBSEasy = EnsureOutputColumn(ws, "bs_easy")
    colBSHard = EnsureOutputColumn(ws, "bs_hard")
    Dim lastRow As Long
    lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
    Dim r As Long
    For r = 2 To lastRow
        ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
        ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
        ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
    Next r
 End Sub
--- a/eohi1/DataP
+++ b/eohi1/DataP
@ -0,0 +1,25 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load the data
 exp1_data <- read.csv("exp1.csv")
 # Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
 exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
 # Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
 exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
 # Save the updated data
 write.csv(exp1_data, "exp1.csv", row.names = FALSE)
 # Display summary of the calculated totals
 cat("NPast_mean_total summary:\n")
 summary(exp1_data$NPast_mean_total)
 cat("\nNFut_mean_total summary:\n")
 summary(exp1_data$NFut_mean_total)
 # Show first few rows to verify calculations
 cat("\nFirst 5 rows of calculated totals:\n")
 print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total", 
                       "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
--- a/correlation_matrix.csv
+++ b/correlation_matrix.csv
@ -0,0 +1,17 @@
 ,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
 ehi_global_mean,0.31***,,,,,,,,,,,
 sex_dummy,0.0086,0.044,,,,,,,,,,
 demo_age_1,-0.12***,-0.19***,0.0094,,,,,,,,,
 edu_num,-0.0081,0.009,-0.062*,-0.023,,,,,,,,
 AOT_total,0.11***,0.086***,-0.042,0.11,0.035,,,,,,,
 CRT_correct,0.068*,0.051,-0.15***,-0.049,0.13***,0.26***,,,,,,
 CRT_int,-0.058,-0.057,0.16***,0.071*,-0.12***,-0.21***,-0.87***,,,,,
 bs_28,-0.01,-0.006803247,-0.0022,-0.071*,-0.065*,-0.27***,-0.25***,0.23***,,,,
 bs_easy,-0.0088,0.014,-0.037,-0.30***,-0.009,-0.31***,-0.094***,0.047,0.61***,,,
 bs_hard,-0.015,-0.024,0.022,0.095**,-0.085**,-0.15***,-0.24***,0.25***,0.87***,0.18***,,
 cal_selfActual,0.015,-0.051,-0.15***,0.18***,0.12***,-0.041,0.047,-0.053,0.31***,0.05315529,0.35***,
 cal_global,0.019,-0.0047,-0.078*,0.031,0.021,-0.16***,-0.15***,0.13***,0.76***,0.37***,0.72***,0.55***
 ,,,,,,,,,,,,
 *p<0.05,,,,,,,,,,,,
 **p<0.01,,,,,,,,,,,,
 ***p<0.001,,,,,,,,,,,,
--- a/correlation_pvalues.csv
+++ b/correlation_pvalues.csv
@ -0,0 +1,18 @@
 ,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
 ehi_global_mean,3.36865E-25,,,,,,,,,,,
 sex_dummy,0.778486413,0.154074007,,,,,,,,,,
 demo_age_1,5.04502E-05,1.68576E-10,0.759436702,,,,,,,,,
 edu_num,0.79326152,0.768960745,0.043945088,0.454222403,,,,,,,,
 AOT_total,0.000229114,0.004867793,0.168503786,0.000314432,0.259313829,,,,,,,
 CRT_correct,0.027291283,0.099344285,4.65922E-07,0.108342391,3.46509E-05,4.3812E-18,,,,,,
 CRT_int,0.05842806,0.065472289,1.53701E-07,0.021280157,6.99182E-05,3.99893E-12,0,,,,,
 bs_28,0.735913365,0.824825013,0.94333124,0.020232959,0.035132191,1.09048E-18,1.61283E-16,1.17159E-13,,,,
 bs_easy,0.775943639,0.653590097,0.22951034,3.86076E-23,0.77008947,1.15661E-24,0.002210577,0.123278134,6.2215E-108,,,
 bs_hard,0.622307404,0.443340859,0.469287124,0.001914319,0.005383608,7.67838E-07,9.71752E-16,2.00781E-16,0,2.79128E-09,,
 cal_selfActual,0.623530057,0.098794041,8.23097E-07,2.62161E-09,0.000133768,0.183121705,0.125377421,0.088656149,2.63226E-25,0.083521419,5.04214E-32,
 cal_global,0.543555525,0.879493859,0.011276459,0.318757547,0.487961367,3.0437E-07,8.00711E-07,1.20286E-05,1.4315E-202,3.1393E-35,1.2757E-171,4.92187E-86
 ,,,,,,,,,,,,
 ,,,,,,,,,,,,
 ,p<0.05,,,,,,,,,,,
 ,p<0.01,,,,,,,,,,,
 ,p<0.001,,,,,,,,,,,
--- a/reliability.html
+++ b/reliability.html
@ -0,0 +1,65 @@
 <html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Two-Item Reliability Summary</h2><p>Pearson r: 0.34049 (95% CI: [0.28622, 0.39258])</p><p>Spearman r: 0.31052 (95% CI: [0.25516, 0.36386])</p><p>Spearman–Brown / Cronbach's α: 0.50801 (95% CI: [0.44506, 0.56382])</p><h2>Cronbach's Alpha</h2><table>
 <thead>
  <tr>
   <th style="text-align:left;">   </th>
   <th style="text-align:right;"> raw_alpha </th>
   <th style="text-align:right;"> std.alpha </th>
   <th style="text-align:right;"> G6(smc) </th>
   <th style="text-align:right;"> average_r </th>
   <th style="text-align:right;"> S/N </th>
   <th style="text-align:right;"> ase </th>
   <th style="text-align:right;"> mean </th>
   <th style="text-align:right;"> sd </th>
   <th style="text-align:right;"> median_r </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;">  </td>
   <td style="text-align:right;"> 0.3078741 </td>
   <td style="text-align:right;"> 0.5080098 </td>
   <td style="text-align:right;"> 0.3404914 </td>
   <td style="text-align:right;"> 0.3404914 </td>
   <td style="text-align:right;"> 1.032561 </td>
   <td style="text-align:right;"> 0.0216938 </td>
   <td style="text-align:right;"> 0.2755268 </td>
   <td style="text-align:right;"> 0.98504 </td>
   <td style="text-align:right;"> 0.3404914 </td>
  </tr>
 </tbody>
 </table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.50801</p><h2>Item-Level Statistics</h2><table>
 <thead>
  <tr>
   <th style="text-align:left;">   </th>
   <th style="text-align:right;"> n </th>
   <th style="text-align:right;"> raw.r </th>
   <th style="text-align:right;"> std.r </th>
   <th style="text-align:right;"> r.cor </th>
   <th style="text-align:right;"> r.drop </th>
   <th style="text-align:right;"> mean </th>
   <th style="text-align:right;"> sd </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;"> eohiDGEN_mean </td>
   <td style="text-align:right;"> 1063 </td>
   <td style="text-align:right;"> 0.9706327 </td>
   <td style="text-align:right;"> 0.8186853 </td>
   <td style="text-align:right;"> 0.4777163 </td>
   <td style="text-align:right;"> 0.3404914 </td>
   <td style="text-align:right;"> 0.4148824 </td>
   <td style="text-align:right;"> 1.740598 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehi_global_mean </td>
   <td style="text-align:right;"> 1063 </td>
   <td style="text-align:right;"> 0.5566839 </td>
   <td style="text-align:right;"> 0.8186853 </td>
   <td style="text-align:right;"> 0.4777163 </td>
   <td style="text-align:right;"> 0.3404914 </td>
   <td style="text-align:right;"> 0.1361712 </td>
   <td style="text-align:right;"> 0.504053 </td>
  </tr>
 </tbody>
 </table></body></html>
--- a/eohi1/Rplots.pdf
+++ b/eohi1/Rplots.pdf
--- a/eohi1/accuracy
+++ b/eohi1/accuracy
--- a/eohi1/age_DGEN_assumptions.png
+++ b/eohi1/age_DGEN_assumptions.png
--- a/eohi1/age_DGEN_plot.png
+++ b/eohi1/age_DGEN_plot.png
--- a/eohi1/age_domain_assumptions.png
+++ b/eohi1/age_domain_assumptions.png
--- a/eohi1/age_domain_plot.png
+++ b/eohi1/age_domain_plot.png
--- a/eohi1/assumption_checks_before_cronbach.r
+++ b/eohi1/assumption_checks_before_cronbach.r
@ -0,0 +1,164 @@
 # Assumption Checks Before Cronbach's Alpha Analysis
 # Run this BEFORE the main reliability analysis
 library(psych)
 library(corrplot)
 library(ggplot2)
 # Read the data
 data <- read.csv("exp1.csv")
 # Define scale variables
 past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
 past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
 past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
                  "NPastDiff_val_performance", "NPastDiff_val_justice")
 past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
                   "NPastDiff_life_important", "NPastDiff_life_change")
 # Function to check assumptions for a scale
 check_assumptions <- function(data, var_names, scale_name) {
  cat("\n", "="*60, "\n")
  cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
  cat("="*60, "\n")
  # Get scale data
  scale_data <- data[, var_names]
  # 1. Sample size check
  complete_cases <- sum(complete.cases(scale_data))
  cat("1. SAMPLE SIZE CHECK:\n")
  cat("   Total participants:", nrow(data), "\n")
  cat("   Complete cases:", complete_cases, "\n")
  cat("   Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
  if(complete_cases < 30) {
    cat("   WARNING: Sample size too small for reliable alpha estimates\n")
    return(FALSE)
  }
  # 2. Missing data check
  cat("\n2. MISSING DATA CHECK:\n")
  missing_counts <- colSums(is.na(scale_data))
  missing_pct <- round(missing_counts / nrow(data) * 100, 2)
  cat("   Missing data by item:\n")
  for(i in 1:length(var_names)) {
    cat("   ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
  }
  max_missing <- max(missing_pct)
  cat("   Maximum missing:", max_missing, "%\n")
  cat("   Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
  # 3. Use only complete cases for remaining checks
  complete_data <- scale_data[complete.cases(scale_data), ]
  # 4. Normality check (Shapiro-Wilk test on first item as example)
  cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
  if(nrow(complete_data) <= 5000) {  # Shapiro-Wilk has sample size limit
    shapiro_result <- shapiro.test(complete_data[, 1])
    cat("   p-value:", round(shapiro_result$p.value, 4), "\n")
    cat("   Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
  } else {
    cat("   Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
  }
  # 5. Inter-item correlations check
  cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
  cor_matrix <- cor(complete_data)
  # Get off-diagonal correlations
  cor_matrix[lower.tri(cor_matrix)] <- NA
  diag(cor_matrix) <- NA
  cors <- as.vector(cor_matrix)
  cors <- cors[!is.na(cors)]
  positive_cors <- sum(cors > 0)
  strong_cors <- sum(cors > 0.30)
  negative_cors <- sum(cors < 0)
  cat("   Total correlations:", length(cors), "\n")
  cat("   Positive correlations:", positive_cors, "\n")
  cat("   Strong correlations (>0.30):", strong_cors, "\n")
  cat("   Negative correlations:", negative_cors, "\n")
  cat("   Mean correlation:", round(mean(cors), 4), "\n")
  cat("   Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
  if(negative_cors > 0) {
    cat("   ⚠️  WARNING: Negative correlations suggest potential issues\n")
  }
  if(strong_cors / length(cors) < 0.5) {
    cat("   ⚠️  WARNING: Many weak correlations may indicate poor scale coherence\n")
  }
  # 6. Item variance check
  cat("\n5. ITEM VARIANCE CHECK:\n")
  item_vars <- apply(complete_data, 2, var)
  var_ratio <- max(item_vars) / min(item_vars)
  cat("   Item variances:", round(item_vars, 4), "\n")
  cat("   Variance ratio (max/min):", round(var_ratio, 4), "\n")
  cat("   Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
  # 7. Outlier check
  cat("\n6. OUTLIER CHECK:\n")
  # Check for multivariate outliers using Mahalanobis distance
  if(nrow(complete_data) > ncol(complete_data)) {
    mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
    outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
    outliers <- sum(mahal_dist > outlier_threshold)
    cat("   Multivariate outliers (p<0.001):", outliers, "\n")
    cat("   Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
  }
  # 8. Summary recommendation
  cat("\n7. OVERALL RECOMMENDATION:\n")
  issues <- 0
  if(complete_cases < 30) issues <- issues + 1
  if(max_missing >= 20) issues <- issues + 1
  if(negative_cors > 0) issues <- issues + 1
  if(var_ratio >= 4) issues <- issues + 1
  if(issues == 0) {
    cat("   ✓ PROCEED with Cronbach's alpha analysis\n")
  } else if(issues <= 2) {
    cat("   ⚠️  PROCEED with CAUTION - some assumptions violated\n")
  } else {
    cat("   ✗ CONSIDER alternatives or data cleaning before proceeding\n")
  }
  return(TRUE)
 }
 # Check assumptions for all past scales
 cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
 cat("\nData: exp1.csv")
 cat("\nTotal sample size:", nrow(data))
 check_assumptions(data, past_pref_vars, "Past Preferences")
 check_assumptions(data, past_pers_vars, "Past Personality") 
 check_assumptions(data, past_val_vars, "Past Values")
 check_assumptions(data, past_life_vars, "Past Life Satisfaction")
 # Quick check of future scales (you can expand this)
 fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
 check_assumptions(data, fut_pref_vars, "Future Preferences")
 cat("\n", "="*60, "\n")
 cat("GENERAL GUIDELINES:\n")
 cat("="*60, "\n")
 cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
 cat("⚠️  If some assumptions are violated, interpret with caution\n")
 cat("✗ If many assumptions are violated, consider alternative approaches:\n")
 cat("   - Omega coefficient (more robust to violations)\n")
 cat("   - Split-half reliability\n")
 cat("   - Test-retest reliability\n")
 cat("   - Factor analysis to check dimensionality\n")
--- a/eohi1/brierVARS.vb
+++ b/eohi1/brierVARS.vb
@ -0,0 +1,141 @@
 Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
    Dim lastCol As Long
    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
    On Error Resume Next
    Dim m As Variant
    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
    On Error GoTo 0
    If IsError(m) Then
        GetCol = 0
    Else
        GetCol = CLng(m)
    End If
 End Function
 Private Function NormalizeTruth(ByVal v As Variant) As Variant
    ' Returns True/False or Null if cannot determine
    If VarType(v) = vbBoolean Then
        NormalizeTruth = v
        Exit Function
    End If
    If IsError(v) Or IsEmpty(v) Then
        NormalizeTruth = Null
        Exit Function
    End If
    Dim s As String
    s = Trim$(UCase$(CStr(v)))
    Select Case s
        Case "TRUE", "T", "1", "YES", "Y"
            NormalizeTruth = True
        Case "FALSE", "F", "0", "NO", "N"
            NormalizeTruth = False
        Case Else
            NormalizeTruth = Null
    End Select
 End Function
 Private Function NormalizeProb01(ByVal v As Variant) As Double
    ' Converts confidence values to [0,1]
    If IsError(v) Or IsEmpty(v) Then
        NormalizeProb01 = -1
        Exit Function
    End If
    Dim s As String
    s = CStr(v)
    If InStr(s, "%") > 0 Then
        s = Replace$(s, "%", "")
        If IsNumeric(s) Then
            NormalizeProb01 = CDbl(s) / 100#
            Exit Function
        End If
    End If
    If IsNumeric(v) Then
        Dim d As Double
        d = CDbl(v)
        If d > 1# Then
            NormalizeProb01 = d / 100#
        Else
            NormalizeProb01 = d
        End If
    Else
        NormalizeProb01 = -1
    End If
 End Function
 Sub brierVARS()
    Dim false_vars As Variant
    Dim true_vars As Variant
    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
    Dim target_headers As Variant
    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
    Dim ws As Worksheet
    Set ws = ThisWorkbook.Sheets(1)
    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
    Dim result As Variant
    Dim r As Long, j As Long
    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
    For i = 0 To UBound(false_vars) Step 2
        srcVar = false_vars(i)
        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
        colSource1 = GetCol(srcVar, ws)
        colSourceCON = GetCol(false_vars(i + 1), ws)
        If colSource1 > 0 And colSourceCON > 0 Then
            For r = 2 To rowCount
                checkVal = ws.Cells(r, colSource1).Value
                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
                colTarget = 0
                For j = 0 To UBound(target_headers)
                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
                        colTarget = GetCol(target_headers(j), ws)
                        Exit For
                    End If
                Next j
                If colTarget > 0 Then
                    Dim truth As Variant
                    truth = NormalizeTruth(checkVal)
                    If IsNull(truth) Or val < 0 Then
                        result = CVErr(xlErrNA)
                    ElseIf truth = True Then
                        result = (val - 0#) ^ 2
                    Else
                        result = ((1# - val) - 0#) ^ 2
                    End If
                    ws.Cells(r, colTarget).Value = result
                End If
            Next r
        End If
    Next i
    For i = 0 To UBound(true_vars) Step 2
        srcVar = true_vars(i)
        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
        colSource1 = GetCol(srcVar, ws)
        colSourceCON = GetCol(true_vars(i + 1), ws)
        If colSource1 > 0 And colSourceCON > 0 Then
            For r = 2 To rowCount
                checkVal = ws.Cells(r, colSource1).Value
                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
                colTarget = 0
                For j = 0 To UBound(target_headers)
                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
                        colTarget = GetCol(target_headers(j), ws)
                        Exit For
                    End If
                Next j
                If colTarget > 0 Then
                    Dim truth2 As Variant
                    truth2 = NormalizeTruth(checkVal)
                    If IsNull(truth2) Or val < 0 Then
                        result = CVErr(xlErrNA)
                    ElseIf truth2 = True Then
                        result = (val - 1#) ^ 2
                    Else
                        result = ((1# - val) - 1#) ^ 2
                    End If
                    ws.Cells(r, colTarget).Value = result
                End If
            Next r
        End If
    Next i
 End Sub
--- a/eohi1/corr_bs_all.csv
+++ b/eohi1/corr_bs_all.csv
@ -0,0 +1,57 @@
 group,var_x,var_y,n,r,p
 BS_vs_Cal,bs_28,cal_global,1063,0.761895605,2.37E-202
 BS_vs_Cal,bs_easy,cal_global,1063,0.367329651,2.67E-35
 BS_vs_Cal,bs_hard,cal_global,1063,0.721638057,1.16E-171
 BS_vs_Cal,bs_28,cal_selfActual,1063,0.312906942,1.41E-25
 BS_vs_Cal,bs_easy,cal_selfActual,1063,0.054307733,0.07675131
 BS_vs_Cal,bs_hard,cal_selfActual,1063,0.351630149,2.70E-32
 BS_vs_EOHI,bs_28,ehi_global_mean,1063,-0.005868845,0.848429656
 BS_vs_EOHI,bs_easy,ehi_global_mean,1063,0.014251763,0.642549738
 BS_vs_EOHI,bs_hard,ehi_global_mean,1063,-0.022683413,0.460035927
 Cal_vs_EOHI,cal_global,ehi_global_mean,1063,-0.004912877,0.872888653
 Cal_vs_EOHI,cal_selfActual,ehi_global_mean,1063,-0.049727552,0.105147189
 BS_vs_EOHI,bs_28,ehi_life_mean,1063,-0.007684801,0.802384957
 BS_vs_EOHI,bs_easy,ehi_life_mean,1063,-0.024372981,0.427293321
 BS_vs_EOHI,bs_hard,ehi_life_mean,1063,-0.001049006,0.972748582
 Cal_vs_EOHI,cal_global,ehi_life_mean,1063,-0.002906394,0.924594545
 Cal_vs_EOHI,cal_selfActual,ehi_life_mean,1063,0.028247567,0.357533744
 BS_vs_EOHI,bs_28,ehi_pers_mean,1063,-0.00684928,0.823495332
 BS_vs_EOHI,bs_easy,ehi_pers_mean,1063,-0.009992311,0.744870091
 BS_vs_EOHI,bs_hard,ehi_pers_mean,1063,-0.013361509,0.663459912
 Cal_vs_EOHI,cal_global,ehi_pers_mean,1063,-0.009160631,0.765455012
 Cal_vs_EOHI,cal_selfActual,ehi_pers_mean,1063,-0.082807345,0.006907147
 BS_vs_EOHI,bs_28,ehi_pref_mean,1063,-0.009153426,0.765634093
 BS_vs_EOHI,bs_easy,ehi_pref_mean,1063,0.038767348,0.20660862
 BS_vs_EOHI,bs_hard,ehi_pref_mean,1063,-0.033018506,0.282127883
 Cal_vs_EOHI,cal_global,ehi_pref_mean,1063,0.009717642,0.751649131
 Cal_vs_EOHI,cal_selfActual,ehi_pref_mean,1063,-0.044228709,0.149576975
 BS_vs_EOHI,bs_28,ehi_val_mean,1063,0.011477853,0.708558862
 BS_vs_EOHI,bs_easy,ehi_val_mean,1063,0.048673437,0.11273791
 BS_vs_EOHI,bs_hard,ehi_val_mean,1063,-0.008316877,0.786509127
 Cal_vs_EOHI,cal_global,ehi_val_mean,1063,-0.024447239,0.425886059
 Cal_vs_EOHI,cal_selfActual,ehi_val_mean,1063,-0.099629152,0.001143742
 BS_vs_EOHI,bs_28,eohiDGEN_life,1063,-0.011893558,0.69851102
 BS_vs_EOHI,bs_easy,eohiDGEN_life,1063,-0.087676604,0.004226705
 BS_vs_EOHI,bs_hard,eohiDGEN_life,1063,0.036722132,0.231590633
 Cal_vs_EOHI,cal_global,eohiDGEN_life,1063,0.004140068,0.892751642
 Cal_vs_EOHI,cal_selfActual,eohiDGEN_life,1063,0.036877484,0.229620994
 BS_vs_EOHI,bs_28,eohiDGEN_mean,1063,-0.008492209,0.782120675
 BS_vs_EOHI,bs_easy,eohiDGEN_mean,1063,-0.007458252,0.808095459
 BS_vs_EOHI,bs_hard,eohiDGEN_mean,1063,-0.013570736,0.658521255
 Cal_vs_EOHI,cal_global,eohiDGEN_mean,1063,0.019739833,0.520291178
 Cal_vs_EOHI,cal_selfActual,eohiDGEN_mean,1063,0.016902147,0.582001734
 BS_vs_EOHI,bs_28,eohiDGEN_pers,1063,-0.013831247,0.652392826
 BS_vs_EOHI,bs_easy,eohiDGEN_pers,1063,-0.013771822,0.653788727
 BS_vs_EOHI,bs_hard,eohiDGEN_pers,1063,-0.013837654,0.652242395
 Cal_vs_EOHI,cal_global,eohiDGEN_pers,1063,-0.010393397,0.735006441
 Cal_vs_EOHI,cal_selfActual,eohiDGEN_pers,1063,-0.028354225,0.355720599
 BS_vs_EOHI,bs_28,eohiDGEN_pref,1063,0.002305478,0.940152072
 BS_vs_EOHI,bs_easy,eohiDGEN_pref,1063,0.014268945,0.642148877
 BS_vs_EOHI,bs_hard,eohiDGEN_pref,1063,-0.006108935,0.842308668
 Cal_vs_EOHI,cal_global,eohiDGEN_pref,1063,0.024379777,0.427164421
 Cal_vs_EOHI,cal_selfActual,eohiDGEN_pref,1063,0.008387657,0.784736723
 BS_vs_EOHI,bs_28,eohiDGEN_val,1063,-0.043411125,0.157255062
 BS_vs_EOHI,bs_easy,eohiDGEN_val,1063,-0.058087693,0.058325548
 BS_vs_EOHI,bs_hard,eohiDGEN_val,1063,-0.032241181,0.293618158
 Cal_vs_EOHI,cal_global,eohiDGEN_val,1063,-0.032038903,0.296658895
 Cal_vs_EOHI,cal_selfActual,eohiDGEN_val,1063,0.035471252,0.247887232
--- a/eohi1/correlation
+++ b/eohi1/correlation
@ -0,0 +1,103 @@
 options(scipen = 999)
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 df <- read.csv("ehi1.csv")
 data <- df %>%
  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
  filter(demo_sex != "Prefer not to say")
 print(colSums(is.na(data)))
 print(sapply(data, class))
 # Create dummy variable for sex (0 = Male, 1 = Female)
 data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
 # Verify the dummy coding
 print(table(data$demo_sex, data$sex_dummy))
 #descriptives
 # Descriptives for age
 print(summary(data$demo_age_1))
 print(sd(data$demo_age_1, na.rm = TRUE))
 # Center demo_age_1 (subtract the mean)
 data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
 # Verify the centering
 print(summary(data$age_centered))
 # Descriptives for sex (frequency table)
 print(table(data$demo_sex))
 print(prop.table(table(data$demo_sex)))
 # Descriptives for sex dummy variable
 print(table(data$sex_dummy))
 # Convert edu3 to numeric factor for correlations (1, 2, 3)
 # First ensure edu3 is a factor, then convert to numeric
 data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
 data$edu_num <- as.numeric(data$edu3)
 # Check the numeric conversion
 print(table(data$edu_num, useNA = "ifany"))
 # Verify the conversion
 print(table(data$edu3, data$edu_num, useNA = "ifany"))
 ####correlation matrix ####
 # Select numeric variables for correlation matrix
 numeric_vars <- data %>%
  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
 # Create Spearman correlation matrix
 cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
 # Print correlation matrix
 print(round(cor_matrix, 3))
 # Get significance tests for correlations using psych package
 library(psych)
 # Create correlation matrix with significance tests
 cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
 # Print correlation matrix
 print(round(cor_test$r, 3))
 # Print p-values
 print(round(cor_test$p, 3))
 # Print all correlations with r and p values (for reporting)
 for(i in 1:nrow(cor_test$r)) {
  for(j in 1:ncol(cor_test$r)) {
    if(i != j) {  # Skip diagonal
      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
          ": r =", round(cor_test$r[i, j], 3), 
          ", p =", round(cor_test$p[i, j], 3), "\n")
    }
  }
 }
 # Also print significant correlations summary
 sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
 if(nrow(sig_cors) > 0) {
  for(i in 1:nrow(sig_cors)) {
    row_idx <- sig_cors[i, 1]
    col_idx <- sig_cors[i, 2]
    if(row_idx != col_idx) {  # Skip diagonal
      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
    }
  }
 }
 # Save correlation matrix and p-values to CSV files
 write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
 write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
--- a/eohi1/correlation_exp1.csv
+++ b/eohi1/correlation_exp1.csv
@ -0,0 +1,31 @@
 Variable1,Variable2,Spearman_r,P_value
 ehi_global_mean,AOT_total,0.084899905,0.005609163
 ehi_life_mean,AOT_total,0.072949943,0.017368706
 ehi_pref_mean,CRT_correct,0.068917794,0.024639893
 ehi_pref_mean,CRT_int,-0.064728497,0.034848248
 eohiDGEN_life,AOT_total,0.117946108,0.000116007
 eohiDGEN_mean,AOT_total,0.111022973,0.000287049
 eohiDGEN_mean,CRT_correct,0.066645932,0.029798224
 eohiDGEN_pers,AOT_total,0.091596281,0.002797567
 eohiDGEN_pref,AOT_total,0.068355715,0.025838342
 eohiDGEN_val,AOT_total,0.103955004,0.000687468
 eohiDGEN_val,CRT_correct,0.074789341,0.014729792
 eohiDGEN_val,CRT_int,-0.06044435,0.048816455
 eohiDGEN_pref,CRT_int,-0.058185337,0.057903079
 ehi_global_mean,CRT_int,-0.057457963,0.061112
 eohiDGEN_mean,CRT_int,-0.057307383,0.061794359
 eohiDGEN_pref,CRT_correct,0.053610168,0.080621938
 ehi_global_mean,CRT_correct,0.051329314,0.094394596
 ehi_pers_mean,CRT_correct,0.051328061,0.094402649
 ehi_pref_mean,AOT_total,0.05039887,0.100528033
 ehi_pers_mean,CRT_int,-0.049058279,0.109918146
 eohiDGEN_life,CRT_correct,0.039494214,0.198218491
 ehi_val_mean,CRT_int,-0.037910532,0.216825816
 ehi_pers_mean,AOT_total,0.031316388,0.307691267
 eohiDGEN_pers,CRT_correct,0.025658263,0.403319679
 eohiDGEN_pers,CRT_int,-0.017319242,0.572720797
 ehi_life_mean,CRT_correct,0.016392301,0.593440889
 ehi_val_mean,AOT_total,0.015193625,0.620731537
 ehi_val_mean,CRT_correct,0.00599121,0.845308845
 ehi_life_mean,CRT_int,-0.005811627,0.849889753
 eohiDGEN_life,CRT_int,-0.001303724,0.966135022
--- a/eohi1/correlation_plot_scales_pearson.pdf
+++ b/eohi1/correlation_plot_scales_pearson.pdf
--- a/eohi1/correlation_plot_scales_spearman.pdf
+++ b/eohi1/correlation_plot_scales_spearman.pdf
--- a/eohi1/correlations
+++ b/eohi1/correlations
@ -0,0 +1,81 @@
 # Load required libraries
 library(Hmisc)
 library(knitr)
 library(dplyr)
 library(corrr)
 library(broom)
 library(purrr)
 library(tidyr)
 library(tibble)
 library(boot)
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load data
 df1 <- read.csv("ehi1.csv")
 # Keep only required columns for the analysis
 bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
 eohi_vars <- c(
  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
 )
 cal_vars  <- c("cal_selfActual","cal_global")
 df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
 # --- Brier score correlations vs EOHIs and Calibration ---
 # Variables
 bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
 eohi_vars <- c(
  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
 )
 cal_vars  <- c("cal_selfActual","cal_global")
 # Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
 corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
  results <- purrr::pmap_dfr(grid, function(x, y) {
    xv <- df[[x]]; yv <- df[[y]]
    ok <- is.finite(xv) & is.finite(yv)
    if (sum(ok) < 3) {
      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
    }
    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
  })
  dplyr::arrange(results, var_x, var_y)
 }
 # Compute correlations (Spearman only)
 corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
 corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
 corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
 # Wide r-only tables (optional)
 to_wide <- function(d) {
  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
 }
 wide_bs_eohi <- to_wide(corr_bs_eohi)
 wide_bs_cal  <- to_wide(corr_bs_cal)
 wide_cal_eohi <- to_wide(corr_cal_eohi)
 # Display
 print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
 print(corr_bs_eohi)
 print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
 print(corr_bs_cal)
 print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
 print(corr_cal_eohi)
 # Export a single CSV combining all sets
 corr_bs_eohi$group   <- "BS_vs_EOHI"
 corr_bs_cal$group    <- "BS_vs_Cal"
 corr_cal_eohi$group  <- "Cal_vs_EOHI"
 corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
  dplyr::relocate(group, .before = var_x)
 write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
--- a/eohi1/correlations
+++ b/eohi1/correlations
@ -0,0 +1,303 @@
 # Load required libraries
 library(Hmisc)
 library(knitr)
 library(dplyr)
 library(corrr)
 library(broom)
 library(purrr)
 library(tidyr)
 library(tibble)
 library(boot)
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load data
 df1 <- read.csv("exp1.csv")
 # Remove columns with all NA values
 df1 <- df1 %>% select(where(~ !all(is.na(.))))
 # Select variables of interest
 eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
 cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
 # Create dataset with selected variables
 df <- df1[, c(eohi_vars, cal_vars)]
 # Ensure all selected variables are numeric
 df <- df %>%
  mutate(across(everything(), as.numeric))
 # Remove rows with any missing values for correlation analysis
 df_complete <- df[complete.cases(df), ]
 cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
 cat("Total sample size:", nrow(df), "\n")
 str(df)
 summary(df)
 ####==== DESCRIPTIVE STATISTICS ====
 # Function to compute descriptive statistics
 get_descriptives <- function(data, vars) {
  desc_stats <- data %>%
    select(all_of(vars)) %>%
    summarise(across(everything(), list(
      n = ~sum(!is.na(.)),
      mean = ~mean(., na.rm = TRUE),
      sd = ~sd(., na.rm = TRUE),
      min = ~min(., na.rm = TRUE),
      max = ~max(., na.rm = TRUE),
      median = ~median(., na.rm = TRUE),
      q25 = ~quantile(., 0.25, na.rm = TRUE),
      q75 = ~quantile(., 0.75, na.rm = TRUE)
    ))) %>%
    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
    pivot_wider(names_from = stat, values_from = value) %>%
    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
  return(desc_stats)
 }
 # Get descriptives for EOHI variables
 eohi_descriptives <- get_descriptives(df, eohi_vars)
 cat("\n=== EOHI Variables Descriptives ===\n")
 print(eohi_descriptives)
 # Get descriptives for calibration variables
 cal_descriptives <- get_descriptives(df, cal_vars)
 cat("\n=== Calibration Variables Descriptives ===\n")
 print(cal_descriptives)
 # Check for bimodal or unusual distributions
 hist(df$eohi_pref)
 hist(df$cal_selfActual)
 # Look for extreme values
 # boxplot(df$eohi_pref)
 # boxplot(df$cal_selfActual)
 # Test normality for each variable - probably unnecessary
 library(nortest)
 # Test EOHI variables
 for(var in eohi_vars) {
  cat("\n", var, "normality test:\n")
  print(shapiro.test(df_complete[[var]]))
 }
 # Test calibration variables  
 for(var in cal_vars) {
  cat("\n", var, "normality test:\n")
  print(shapiro.test(df_complete[[var]]))
 }
 ####==== PEARSON CORRELATIONS ====
 # Compute correlation matrix with p-values
 cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
 # Extract correlation coefficients
 cor_pearson <- cor_results_pearson$r
 # Extract p-values
 p_matrix_pearson <- cor_results_pearson$P
 # Function to add significance stars
 corstars <- function(cor_mat, p_mat) {
  stars <- ifelse(p_mat < 0.001, "***",
                  ifelse(p_mat < 0.01, "**",
                         ifelse(p_mat < 0.05, "*", "")))
  # Combine correlation values with stars, rounded to 5 decimal places
  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
                           nrow = nrow(cor_mat))
  # Set row and column names
  rownames(cor_with_stars) <- rownames(cor_mat)
  colnames(cor_with_stars) <- colnames(cor_mat)
  return(cor_with_stars)
 }
 # Apply the function
 cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
 cat("\n=== PEARSON CORRELATIONS ===\n")
 print(cor_table_pearson, quote = FALSE)
 # Extract specific correlations between EOHI and calibration variables
 eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
 eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
 cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
 for(i in 1:nrow(eohi_cal_correlations)) {
  for(j in 1:ncol(eohi_cal_correlations)) {
    cor_val <- eohi_cal_correlations[i, j]
    p_val <- eohi_cal_pvalues[i, j]
    star <- ifelse(p_val < 0.001, "***",
                   ifelse(p_val < 0.01, "**",
                          ifelse(p_val < 0.05, "*", "")))
    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
                rownames(eohi_cal_correlations)[i], 
                colnames(eohi_cal_correlations)[j], 
                cor_val, star, p_val))
  }
 }
 ####==== SPEARMAN CORRELATIONS ====
 # Compute Spearman correlation matrix with p-values
 cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
 # Extract correlation coefficients
 cor_spearman <- cor_results_spearman$r
 # Extract p-values
 p_matrix_spearman <- cor_results_spearman$P
 # Apply the function
 cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
 cat("\n=== SPEARMAN CORRELATIONS ===\n")
 print(cor_table_spearman, quote = FALSE)
 # Extract specific correlations between EOHI and calibration variables
 eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
 eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
 cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
 for(i in 1:nrow(eohi_cal_correlations_spearman)) {
  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
    cor_val <- eohi_cal_correlations_spearman[i, j]
    p_val <- eohi_cal_pvalues_spearman[i, j]
    star <- ifelse(p_val < 0.001, "***",
                   ifelse(p_val < 0.01, "**",
                          ifelse(p_val < 0.05, "*", "")))
    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
                rownames(eohi_cal_correlations_spearman)[i], 
                colnames(eohi_cal_correlations_spearman)[j], 
                cor_val, star, p_val))
  }
 }
 ####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
 # Function to compute correlation with bootstrap CI
 bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
  # Remove missing values
  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
  if(nrow(complete_data) < 3) {
    return(data.frame(
      correlation = NA,
      ci_lower = NA,
      ci_upper = NA,
      n = nrow(complete_data)
    ))
  }
  # Bootstrap function
  boot_fun <- function(data, indices) {
    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
  }
  # Perform bootstrap
  set.seed(123)  # for reproducibility
  boot_results <- boot(complete_data, boot_fun, R = R)
  # Calculate confidence interval
  ci <- boot.ci(boot_results, type = "perc")
  return(data.frame(
    correlation = boot_results$t0,
    ci_lower = ci$perc[4],
    ci_upper = ci$perc[5],
    n = nrow(complete_data)
  ))
 }
 # Compute bootstrap CIs for all EOHI x Calibration correlations
 cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
 bootstrap_results_pearson <- expand.grid(
  eohi_var = eohi_vars,
  cal_var = cal_vars,
  stringsAsFactors = FALSE
 ) %>%
  pmap_dfr(function(eohi_var, cal_var) {
    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
    result$eohi_var <- eohi_var
    result$cal_var <- cal_var
    result$method <- "pearson"
    return(result)
  }) %>%
  mutate(
    correlation = round(correlation, 5),
    ci_lower = round(ci_lower, 5),
    ci_upper = round(ci_upper, 5)
  )
 print(bootstrap_results_pearson)
 cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
 bootstrap_results_spearman <- expand.grid(
  eohi_var = eohi_vars,
  cal_var = cal_vars,
  stringsAsFactors = FALSE
 ) %>%
  pmap_dfr(function(eohi_var, cal_var) {
    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
    result$eohi_var <- eohi_var
    result$cal_var <- cal_var
    result$method <- "spearman"
    return(result)
  }) %>%
  mutate(
    correlation = round(correlation, 5),
    ci_lower = round(ci_lower, 5),
    ci_upper = round(ci_upper, 5)
  )
 print(bootstrap_results_spearman)
 ####==== SUMMARY TABLE ====
 # Create comprehensive summary table
 summary_table <- bootstrap_results_pearson %>%
  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
  left_join(
    bootstrap_results_spearman %>%
      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
    by = c("eohi_var", "cal_var")
  ) %>%
  # Add p-values
  left_join(
    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
      pmap_dfr(function(eohi_var, cal_var) {
        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
        data.frame(
          eohi_var = eohi_var,
          cal_var = cal_var,
          pearson_p = pearson_p,
          spearman_p = spearman_p
        )
      }),
    by = c("eohi_var", "cal_var")
  ) %>%
  mutate(
    pearson_p = round(pearson_p, 5),
    spearman_p = round(spearman_p, 5)
  )
 cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
 print(summary_table)
 # Save results to CSV
 # write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
 # cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
--- a/eohi1/correlations
+++ b/eohi1/correlations
@ -0,0 +1,171 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load required libraries
 library(corrplot)
 library(Hmisc)
 library(psych)
 # Load the data
 exp1_data <- read.csv("ehi1.csv")
 # Define the two sets of variables
 set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
 set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
 # Create subset with only the variables of interest
 correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
 # ===== NORMALITY CHECKS =====
 # Shapiro-Wilk tests for normality
 for(var in names(correlation_data)) {
  shapiro_result <- shapiro.test(correlation_data[[var]])
  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
              var, shapiro_result$p.value, 
              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
 }
 # Visual normality checks
 pdf("normality_plots.pdf", width = 12, height = 8)
 par(mfrow = c(2, 4))
 for(var in names(correlation_data)) {
  # Histogram with normal curve overlay
  hist(correlation_data[[var]], main = paste("Histogram:", var), 
       xlab = var, freq = FALSE)
  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
        add = TRUE, col = "red", lwd = 2)
  # Q-Q plot
  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
  qqline(correlation_data[[var]], col = "red", lwd = 2)
 }
 dev.off()
 # ===== LINEARITY CHECKS =====
 # Check linearity between variable pairs
 pdf("linearity_plots.pdf", width = 15, height = 10)
 par(mfrow = c(3, 5))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    # Scatter plot with regression line
    plot(correlation_data[[var1]], correlation_data[[var2]], 
         main = paste(var1, "vs", var2),
         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
    # Add linear regression line
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    abline(lm_fit, col = "red", lwd = 2)
    # Add LOESS smooth line for non-linear pattern detection
    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
    loess_pred <- predict(loess_fit, x_seq)
    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
    # Calculate R-squared for linear fit
    r_squared <- summary(lm_fit)$r.squared
    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
  }
 }
 dev.off()
 # Residual analysis for linearity
 pdf("residual_plots.pdf", width = 15, height = 10)
 par(mfrow = c(3, 5))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    residuals <- residuals(lm_fit)
    fitted <- fitted(lm_fit)
    plot(fitted, residuals, 
         main = paste("Residuals:", var1, "vs", var2),
         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
    abline(h = 0, col = "red", lwd = 2)
    # Add smooth line to residuals
    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
  }
 }
 dev.off()
 # Calculate correlation matrix (Spearman only)
 cor_matrix_spearman <- cor(correlation_data, method = "spearman")
 # Print correlation matrix with 5 decimal places
 print(round(cor_matrix_spearman, 5))
 # Separate correlations between the two sets (Spearman)
 set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
 print(round(set1_set2_cor, 5))
 # Calculate correlations within each set (Spearman)
 set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
 set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
 # Statistical significance tests (Spearman)
 cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
  }
 }
 # Create correlation plot (Spearman only)
 pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
 corrplot(cor_matrix_spearman, method = "color", type = "upper", 
         order = "hclust", tl.cex = 0.8, tl.col = "black",
         addCoef.col = "black", number.cex = 0.7,
         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
 dev.off()
 # Summary statistics
 desc_stats <- describe(correlation_data)
 print(round(desc_stats, 5))
 # Save results to CSV files
 write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
 write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
 # Save correlation results in a formatted table
 cor_results <- data.frame(
  Variable1 = character(),
  Variable2 = character(),
  Spearman_r = numeric(),
  P_value = numeric(),
  stringsAsFactors = FALSE
 )
 # Extract significant correlations between sets
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    r_val <- cor_matrix_spearman[var1, var2]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cor_results <- rbind(cor_results, data.frame(
      Variable1 = var1,
      Variable2 = var2,
      Spearman_r = r_val,
      P_value = p_val,
      stringsAsFactors = FALSE
    ))
  }
 }
 write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
--- a/eohi1/dataP
+++ b/eohi1/dataP
@ -0,0 +1,45 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load the data
 exp1_data <- read.csv("exp1.csv")
 # Define all NPastDiff and NFutDiff variables
 all_diff_vars <- c(
  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
 )
 # Define DGEN variables to average
 dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
 # Calculate domain_mean as average of all 40 variables
 exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
 # Calculate DGEN_mean as average of all 8 DGEN variables
 exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
 # Save the updated data
 write.csv(exp1_data, "exp1.csv", row.names = FALSE)
 # Display summary of the calculated means
 cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
 summary(exp1_data$domain_mean)
 cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
 summary(exp1_data$DGEN_mean)
 # Show first few rows to verify calculations
 cat("\nFirst 5 rows of calculated means:\n")
 print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
 # Show the individual DGEN values for first 5 rows to verify math
 cat("\nFirst 5 rows of individual DGEN values for verification:\n")
 print(exp1_data[1:5, dgen_vars])
--- a/eohi1/datap
+++ b/eohi1/datap
@ -0,0 +1,104 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load data
 ehi1 <- read.csv("ehi1.csv")
 # Create EHI difference variables (NPast - NFut)
 # Preferences
 ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
 ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
 ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
 ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
 ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
 # Personality
 ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
 ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
 ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
 ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
 ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
 # Values
 ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
 ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
 ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
 ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
 ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
 # Life satisfaction
 ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
 ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
 ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
 ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
 ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
 # QA: Verify calculations
 cat("\n=== QUALITY ASSURANCE CHECK ===\n")
 cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
 qa_pairs <- list(
  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
 )
 all_checks_passed <- TRUE
 for (pair in qa_pairs) {
  # Calculate expected difference
  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
  # Get actual value in target variable
  actual_value <- ehi1[[pair$target]]
  # Compare (allowing for floating point precision issues)
  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
  if (length(discrepancies) > 0) {
    cat(sprintf("FAIL: %s\n", pair$target))
    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
                length(discrepancies), 
                paste(head(discrepancies, 10), collapse = ", ")))
    # Show first discrepancy details
    row_num <- discrepancies[1]
    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
                row_num,
                pair$npast, ehi1[[pair$npast]][row_num],
                pair$nfut, ehi1[[pair$nfut]][row_num],
                expected_diff[row_num],
                pair$target, actual_value[row_num]))
    all_checks_passed <- FALSE
  } else {
    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
  }
 }
 cat("\n")
 if (all_checks_passed) {
  cat("*** ALL QA CHECKS PASSED ***\n")
 } else {
  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
 }
 # Save updated dataset
 write.csv(ehi1, "ehi1.csv", row.names = FALSE)
 cat("\nDataset saved to ehi1.csv\n")
--- a/eohi1/datap
+++ b/eohi1/datap
@ -0,0 +1,167 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Load data
 ehi1 <- read.csv("ehi1.csv")
 # Calculate mean scores for EHI variables
 # 1. Preferences mean
 ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
                                         "ehi_pref_tv", "ehi_pref_nap", 
                                         "ehi_pref_travel")], na.rm = TRUE)
 # 2. Personality mean
 ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical", 
                                         "ehi_pers_dependable", "ehi_pers_anxious", 
                                         "ehi_pers_complex")], na.rm = TRUE)
 # 3. Values mean
 ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad", 
                                        "ehi_val_opinion", "ehi_val_performance", 
                                        "ehi_val_justice")], na.rm = TRUE)
 # 4. Life satisfaction mean
 ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent", 
                                         "ehi_life_satisfied", "ehi_life_important", 
                                         "ehi_life_change")], na.rm = TRUE)
 # 5. Global mean (all 20 variables)
 ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
                                           "ehi_pref_tv", "ehi_pref_nap", 
                                           "ehi_pref_travel",
                                           "ehi_pers_extravert", "ehi_pers_critical", 
                                           "ehi_pers_dependable", "ehi_pers_anxious", 
                                           "ehi_pers_complex",
                                           "ehi_val_obey", "ehi_val_trad", 
                                           "ehi_val_opinion", "ehi_val_performance", 
                                           "ehi_val_justice",
                                           "ehi_life_ideal", "ehi_life_excellent", 
                                           "ehi_life_satisfied", "ehi_life_important", 
                                           "ehi_life_change")], na.rm = TRUE)
 # QA: Verify mean calculations
 cat("\n=== QUALITY ASSURANCE CHECK ===\n")
 cat("Verifying EHI mean calculations\n\n")
 cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
 for (i in 1:5) {
  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
            ehi1$ehi_pref_travel[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- ehi1$ehi_pref_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
 for (i in 1:5) {
  vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
            ehi1$ehi_pers_complex[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- ehi1$ehi_pers_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
 for (i in 1:5) {
  vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
            ehi1$ehi_val_justice[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- ehi1$ehi_val_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
 for (i in 1:5) {
  vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
            ehi1$ehi_life_change[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- ehi1$ehi_life_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
 for (i in 1:5) {
  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
            ehi1$ehi_pref_travel[i],
            ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
            ehi1$ehi_pers_complex[i],
            ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
            ehi1$ehi_val_justice[i],
            ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
            ehi1$ehi_life_change[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- ehi1$ehi_global_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
              i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 # Overall QA check for all rows
 cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
 qa_checks <- list(
  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
       target = "ehi_pref_mean", name = "Preferences"),
  list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
       target = "ehi_pers_mean", name = "Personality"),
  list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
       target = "ehi_val_mean", name = "Values"),
  list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
       target = "ehi_life_mean", name = "Life Satisfaction"),
  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
                "ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
                "ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
                "ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
       target = "ehi_global_mean", name = "Global")
 )
 all_checks_passed <- TRUE
 for (check in qa_checks) {
  calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
  actual_mean <- ehi1[[check$target]]
  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
  if (length(discrepancies) > 0) {
    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
                length(discrepancies), 
                paste(head(discrepancies, 10), collapse = ", ")))
    all_checks_passed <- FALSE
  } else {
    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
                check$name, length(check$vars), nrow(ehi1)))
  }
 }
 cat("\n")
 if (all_checks_passed) {
  cat("*** ALL QA CHECKS PASSED ***\n")
 } else {
  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
 }
 # Save updated dataset
 write.csv(ehi1, "ehi1.csv", row.names = FALSE)
 cat("\nDataset saved to ehi1.csv\n")
--- a/eohi1/datap
+++ b/eohi1/datap
@ -0,0 +1,38 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 data <- read.csv("ehi1.csv")
 # Check the levels of the demo_edu variable
 print(levels(factor(data$demo_edu)))
 # Also show the unique values and their frequencies
 print("\nUnique values and frequencies:")
 print(table(data$demo_edu, useNA = "ifany"))
 # Recode demo_edu into 3 ordinal levels
 data$edu3 <- NA
 # HS_TS: High School and Trade School
 data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
 # C_Ug: College and University - Undergraduate  
 data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
 # grad_prof: University - Graduate, University - PhD, and Professional Degree
 data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
 # Convert to ordered factor
 data$edu3 <- factor(data$edu3, 
                    levels = c("HS_TS", "C_Ug", "grad_prof"),
                    ordered = TRUE)
 # Check the recoded variable
 print(table(data$edu3, useNA = "ifany"))
 # Verify the recoding
 print(table(data$demo_edu, data$edu3, useNA = "ifany"))
 # Save the updated dataset with the new edu3 variable
 write.csv(data, "ehi1.csv", row.names = FALSE)
--- a/eohi1/descriptive_statistics.csv
+++ b/eohi1/descriptive_statistics.csv
@ -0,0 +1,14 @@
 "","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
 "eohiDGEN_pref",1,1063,0.31138,2.29712,0,0.27497,1.4826,-10,10,20,0.16174,2.49869,0.07046
 "eohiDGEN_pers",2,1063,0.44309,2.40063,0,0.39365,1.4826,-10,10,20,0.04618,2.20622,0.07363
 "eohiDGEN_val",3,1063,0.49012,2.38159,0,0.3772,1.4826,-10,10,20,0.37348,2.6545,0.07305
 "eohiDGEN_life",4,1063,0.41016,2.72624,0,0.3866,1.4826,-10,10,20,0.06705,1.55144,0.08362
 "eohiDGEN_mean",5,1063,0.41488,1.7406,0,0.34323,0.99334,-9,9.33,18.33,0.43898,2.96172,0.05339
 "ehi_pref_mean",6,1063,0.13885,0.64266,0,0.09683,0.29652,-2.2,3.2,5.4,0.93756,3.43951,0.01971
 "ehi_pers_mean",7,1063,0.09614,0.71979,0,0.07027,0.59304,-3.6,3.8,7.4,0.53524,3.53968,0.02208
 "ehi_val_mean",8,1063,0.19699,0.72241,0,0.14266,0.29652,-3.4,4.4,7.8,1.17086,5.08196,0.02216
 "ehi_life_mean",9,1063,0.1127,1.12639,0,0.06792,0.59304,-4.8,6,10.8,0.45879,2.97822,0.03455
 "ehi_global_mean",10,1063,0.13617,0.50405,0.05,0.09871,0.37065,-1.65,2.65,4.3,1.09805,3.7645,0.01546
 "AOT_total",11,1063,0.6945,0.60845,0.75,0.69859,0.7413,-1.125,2,3.125,-0.09166,-0.61789,0.01866
 "CRT_correct",12,1063,0.3132,0.36479,0.33,0.26666,0.48926,0,1,1,0.79518,-0.77639,0.01119
 "CRT_int",13,1063,0.614,0.36086,0.67,0.6424,0.48926,0,1,1,-0.45012,-1.11195,0.01107
--- a/eohi1/descriptives
+++ b/eohi1/descriptives
@ -0,0 +1,107 @@
 library(tidyverse)
 library(ggplot2)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Read data
 data <- read.csv("exp1.csv")
 # Select variables ending exactly with _T or _F
 df <- data %>% select(matches("(_T|_F)$"))
 # Remove demo_f variable (if present)
 df <- df %>% select(-any_of("demo_f"))
 str(df)
 # Coerce to numeric where possible (without breaking non-numeric)
 df_num <- df %>%
  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
 # Compute count and proportion correct per variable
 descriptives <- purrr::imap_dfr(df_num, function(col, name) {
  x <- suppressWarnings(as.numeric(col))
  x <- x[!is.na(x)]
  n_total <- length(x)
  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
  # Extract difficulty number from variable name and map to expected range
  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
  expected_ranges <- list(
    "15" = c(0.15, 0.25),
    "35" = c(0.35, 0.45), 
    "55" = c(0.55, 0.65),
    "75" = c(0.75, 0.85)
  )
  if (as.character(difficulty_num) %in% names(expected_ranges)) {
    expected_range <- expected_ranges[[as.character(difficulty_num)]]
    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
  } else {
    match_difficulty <- "UNKNOWN"
  }
  tibble(
    variable = name,
    n_total = n_total,
    n_correct = n_correct,
    prop_correct = round(prop, 5),
    match_difficulty = match_difficulty
  )
 }) %>%
  arrange(variable)
 # Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
 bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
 bin_factor <- cut(
  descriptives$prop_correct,
  breaks = seq(0.10, 1.00, by = 0.10),
  right = FALSE,
  include.lowest = FALSE,
  labels = bin_levels
 )
 bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
  group_by(bin) %>%
  summarise(num_variables = n(), .groups = "drop")
 # Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
 bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
 bin15_factor <- cut(
  descriptives$prop_correct,
  breaks = seq(0.15, 0.95, by = 0.10),
  right = FALSE,
  include.lowest = FALSE,
  labels = bin15_levels
 )
 bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
  group_by(bin) %>%
  summarise(num_variables = n(), .groups = "drop")
 # View
 print(descriptives, n = Inf)
 cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
 print(bin_counts, n = Inf)
 cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
 print(bin15_counts, n = Inf)
 # Histogram of proportion correct with custom bins
 histogram <- ggplot(descriptives, aes(x = prop_correct)) +
  geom_histogram(
    breaks = seq(0.15, 0.95, by = 0.10),
    fill = "lightblue", 
    color = "black", 
    alpha = 0.7
  ) +
  labs(
    title = "Distribution of Proportion Correct",
    x = "Proportion Correct",
    y = "Number of Variables"
  ) +
  theme_minimal() +
  scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
 print(histogram)
 # Optionally save
 # readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
--- a/eohi1/e1
+++ b/eohi1/e1
@ -0,0 +1,88 @@
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 options(scipen = 999)
 df <- read.csv("ehi1.csv")
 library(psych)
 library(knitr)
 # fixed-decimal formatter (five decimals)
 fmt5 <- function(x) formatC(x, format = "f", digits = 5)
 # Select the 4 variables for reliability analysis
 reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
                      c("eohiDGEN_mean", "ehi_global_mean")]
 # Cronbach's Alpha
 alpha_result <- alpha(reliability_data)
 # Split-half reliability
 split_half <- splitHalf(reliability_data)
 # Inter-item correlations
 cor_matrix <- cor(reliability_data, use = "complete.obs")
 # Two-item reliability summary (if applicable)
 two_item_section <- ""
 if (ncol(reliability_data) == 2) {
  n_complete <- sum(complete.cases(reliability_data))
  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
  r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
  # Fisher z CI for r
  fisher_z <- atanh(r_pearson)
  se_z <- 1 / sqrt(n_complete - 3)
  z_crit <- qnorm(0.975)
  ci_z_lower <- fisher_z - z_crit * se_z
  ci_z_upper <- fisher_z + z_crit * se_z
  ci_r_lower <- tanh(ci_z_lower)
  ci_r_upper <- tanh(ci_z_upper)
  # Approximate Fisher z CI for Spearman rho (large-sample approximation)
  fisher_z_s <- atanh(r_spearman)
  ci_zs_lower <- fisher_z_s - z_crit * se_z
  ci_zs_upper <- fisher_z_s + z_crit * se_z
  ci_s_lower <- tanh(ci_zs_lower)
  ci_s_upper <- tanh(ci_zs_upper)
  # Spearman–Brown/Cronbach's alpha for k = 2
  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
  two_item_section <- paste0(
    "<h2>Two-Item Reliability Summary</h2>",
    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
    "<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
  )
 }
 # Create a summary table
 summary_table <- data.frame(
  Variable = names(reliability_data),
  n = nrow(reliability_data),
  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
 )
 html_output <- paste0(
  "<html><head><title>EHI Reliability Analysis</title></head><body>",
  "<h1>EHI Reliability Analysis</h1>",
  two_item_section,
  "<h2>Cronbach's Alpha</h2>",
  kable(alpha_result$total, format = "html"),
  "<h2>Split-Half Reliability</h2>",
  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
  "<h2>Item-Level Statistics</h2>",
  kable(alpha_result$item.stats, format = "html"),
  "</body></html>"
 )
 writeLines(html_output, "EHI reliability.html")
--- a/eohi1/education_DGEN_means.png
+++ b/eohi1/education_DGEN_means.png
--- a/eohi1/education_domain_means.png
+++ b/eohi1/education_domain_means.png
--- a/eohi1/ehi1
+++ b/eohi1/ehi1
--- a/eohi1/ehi1.csv
+++ b/eohi1/ehi1.csv
--- a/eohi1/eohi_calibration_correlations_summary.csv
+++ b/eohi1/eohi_calibration_correlations_summary.csv
@ -0,0 +1,81 @@
 eohi_var,cal_var,n,pearson_r,pearson_ci_lower,pearson_ci_upper,pearson_p,spearman_rho,spearman_ci_lower,spearman_ci_upper,spearman_p
 eohi_mean,cal_selfActual,1063,-0.09447,-0.15019,-0.0362,0.00205,-0.11288,-0.1699,-0.05351,0.00023
 eohi_val,cal_selfActual,1063,-0.09384,-0.15347,-0.03616,0.00219,-0.09963,-0.16267,-0.04046,0.00114
 eohi_pers,cal_selfActual,1063,-0.07652,-0.12898,-0.01613,0.01258,-0.08281,-0.14357,-0.02535,0.00691
 eohiDGEN_pref,cal_15,1063,0.11449,0.04892,0.17239,0.00018,0.08071,0.01776,0.13995,0.00847
 eohi_pers,cal_55,1063,-0.06004,-0.1185,-0.0065,0.05036,-0.07699,-0.13816,-0.01519,0.01204
 eohi_mean,cal_55,1063,-0.06632,-0.11782,-0.01552,0.03061,-0.07588,-0.13634,-0.01868,0.01333
 eohiDGEN_val,cal_55,1063,-0.05006,-0.1073,0.00598,0.10281,-0.06964,-0.12753,-0.01077,0.02318
 eohiDGEN_mean,cal_true,1063,0.05754,-0.0009,0.11652,0.06074,0.0676,0.00725,0.12602,0.02754
 eohiDGEN_life,cal_75,1063,-0.04564,-0.10516,0.01314,0.13702,-0.05638,-0.11573,0.00538,0.06615
 eohi_life,cal_75,1063,-0.03875,-0.09577,0.01879,0.20683,-0.05565,-0.11564,0.00491,0.06974
 eohiDGEN_mean,cal_15,1063,0.07628,0.01598,0.13615,0.01285,0.05395,-0.00598,0.11493,0.0787
 eohi_val,cal_55,1063,-0.05666,-0.10898,-0.00182,0.06479,-0.05346,-0.10885,0.00265,0.0815
 eohiDGEN_life,cal_55,1063,0.05044,-0.00773,0.10614,0.10026,0.04956,-0.00878,0.11087,0.10635
 eohiDGEN_pref,cal_true,1063,0.07292,0.0095,0.134,0.01741,0.04803,-0.0094,0.108,0.11761
 eohi_pref,cal_selfActual,1063,-0.03281,-0.09762,0.032,0.28519,-0.04423,-0.10373,0.01867,0.14958
 eohiDGEN_val,cal_false,1063,-0.02787,-0.09239,0.03411,0.36398,-0.04393,-0.10451,0.01416,0.15236
 eohiDGEN_pers,cal_15,1063,0.04714,-0.01715,0.10944,0.12451,0.04308,-0.01746,0.10317,0.16046
 eohiDGEN_life,cal_false,1063,0.03476,-0.02508,0.09492,0.25753,0.04047,-0.02102,0.10411,0.18735
 eohi_life,cal_true,1063,-0.02269,-0.08192,0.0416,0.45982,-0.03779,-0.0985,0.01964,0.21833
 eohiDGEN_life,cal_15,1063,0.05091,-0.01796,0.11623,0.09709,0.03755,-0.02524,0.09835,0.22121
 eohiDGEN_life,cal_true,1063,-0.00526,-0.06612,0.05833,0.86395,-0.03744,-0.09633,0.02643,0.22263
 eohiDGEN_life,cal_selfActual,1063,0.02779,-0.03579,0.0868,0.36538,0.03688,-0.02625,0.10029,0.22962
 eohi_val,cal_75,1063,-0.03432,-0.09303,0.02511,0.26362,-0.03548,-0.09664,0.02788,0.24775
 eohiDGEN_val,cal_selfActual,1063,0.02114,-0.04078,0.08546,0.49105,0.03547,-0.02636,0.09663,0.24789
 eohiDGEN_pers,cal_true,1063,0.02354,-0.03617,0.08422,0.44334,0.03532,-0.02544,0.09158,0.24994
 eohiDGEN_pers,cal_35,1063,-0.03495,-0.09252,0.02312,0.25493,-0.03489,-0.09183,0.02535,0.25578
 eohiDGEN_pers,cal_75,1063,-0.01611,-0.07516,0.04168,0.59971,-0.03379,-0.08928,0.02687,0.271
 eohiDGEN_pers,cal_false,1063,-0.02503,-0.08151,0.03334,0.4149,-0.03266,-0.08708,0.02624,0.28745
 eohiDGEN_val,cal_global,1063,-0.00447,-0.05803,0.05482,0.8843,-0.03204,-0.08825,0.0252,0.29666
 eohiDGEN_mean,cal_55,1063,-0.03699,-0.08943,0.01668,0.2282,-0.02929,-0.08473,0.03097,0.34006
 eohiDGEN_pers,cal_selfActual,1063,-0.03827,-0.10619,0.02517,0.21254,-0.02835,-0.09145,0.03191,0.35572
 eohi_life,cal_selfActual,1063,0.04737,-0.01652,0.10847,0.12274,0.02825,-0.03176,0.08666,0.35753
 eohi_mean,cal_75,1063,-0.01187,-0.06715,0.04855,0.69916,-0.0275,-0.0859,0.03279,0.37045
 eohiDGEN_val,cal_15,1063,0.00914,-0.04676,0.06618,0.76607,-0.02707,-0.08456,0.02837,0.37789
 eohi_pref,cal_55,1063,-0.02632,-0.08106,0.02643,0.39136,-0.02524,-0.08705,0.03558,0.41101
 eohiDGEN_pref,cal_35,1063,-0.00852,-0.0681,0.0469,0.78147,-0.02501,-0.08136,0.03028,0.41525
 eohi_val,cal_global,1063,-0.03245,-0.08628,0.02097,0.29049,-0.02445,-0.0837,0.03405,0.42589
 eohiDGEN_pref,cal_global,1063,0.05135,-0.00854,0.10781,0.09423,0.02438,-0.03698,0.07747,0.42716
 eohi_mean,cal_global,1063,-0.00987,-0.0669,0.04625,0.74782,-0.02423,-0.08093,0.03538,0.43002
 eohi_mean,cal_false,1063,0.00103,-0.05609,0.05796,0.97329,-0.02413,-0.08528,0.034,0.43184
 eohi_pers,cal_15,1063,0.0424,-0.01973,0.10006,0.16717,0.02377,-0.03499,0.07967,0.4388
 eohiDGEN_pref,cal_75,1063,0.04194,-0.01865,0.10462,0.17182,0.02253,-0.03687,0.08394,0.46315
 eohi_val,cal_35,1063,0.00919,-0.04488,0.06395,0.76478,-0.02132,-0.07724,0.0359,0.48748
 eohiDGEN_mean,cal_75,1063,0.02182,-0.03779,0.08058,0.47734,0.02086,-0.0387,0.08009,0.49684
 eohi_val,cal_15,1063,-0.0114,-0.06675,0.04342,0.71036,0.02045,-0.0384,0.08265,0.50533
 eohiDGEN_mean,cal_global,1063,0.01728,-0.04015,0.07341,0.5736,0.01974,-0.03874,0.07654,0.52029
 eohiDGEN_val,cal_true,1063,0.03211,-0.02651,0.08716,0.29554,0.01936,-0.04555,0.08179,0.52834
 eohi_mean,cal_15,1063,0.02437,-0.03634,0.08546,0.42732,0.01772,-0.03986,0.07648,0.56377
 eohiDGEN_pers,cal_55,1063,-0.02109,-0.07397,0.03478,0.49211,-0.0175,-0.0759,0.04546,0.56869
 eohiDGEN_mean,cal_selfActual,1063,0.00131,-0.05794,0.06413,0.96594,0.0169,-0.04525,0.07532,0.582
 eohi_life,cal_false,1063,0.02929,-0.02351,0.0794,0.34004,0.01689,-0.04339,0.07467,0.58238
 eohiDGEN_mean,cal_false,1063,-0.02032,-0.07402,0.03873,0.5082,-0.01576,-0.07583,0.04145,0.60773
 eohiDGEN_pref,cal_55,1063,-0.01025,-0.064,0.04642,0.73848,-0.01542,-0.0708,0.046,0.61559
 eohi_life,cal_35,1063,0.03599,-0.02203,0.08892,0.24108,0.01538,-0.04474,0.07414,0.61636
 eohi_pers,cal_false,1063,-0.00935,-0.06513,0.04644,0.76086,-0.01485,-0.07336,0.04336,0.62859
 eohi_life,cal_15,1063,0.0156,-0.0452,0.07483,0.61145,0.01432,-0.04315,0.07438,0.64092
 eohi_pers,cal_true,1063,0.0055,-0.05876,0.07464,0.85794,-0.01291,-0.06976,0.04539,0.67419
 eohi_pref,cal_75,1063,0.02725,-0.03153,0.08793,0.37474,0.01264,-0.0447,0.07576,0.68049
 eohi_mean,cal_true,1063,-0.01786,-0.07676,0.04265,0.56079,-0.01207,-0.07091,0.05063,0.69435
 eohi_val,cal_false,1063,-0.01276,-0.07529,0.04177,0.6777,-0.0115,-0.0741,0.04742,0.7081
 eohi_pers,cal_35,1063,0.01428,-0.04037,0.07111,0.6418,0.01092,-0.04486,0.06885,0.7222
 eohiDGEN_pers,cal_global,1063,-0.00723,-0.06312,0.05198,0.81387,-0.01039,-0.06483,0.05266,0.73501
 eohi_mean,cal_35,1063,0.02233,-0.03257,0.08046,0.46697,-0.01029,-0.06915,0.04512,0.73748
 eohi_pref,cal_global,1063,0.0183,-0.03841,0.07643,0.55118,0.00972,-0.0475,0.07007,0.75165
 eohi_pers,cal_global,1063,-0.00467,-0.06118,0.05112,0.87911,-0.00916,-0.06635,0.05143,0.76546
 eohiDGEN_pref,cal_selfActual,1063,0.02085,-0.03875,0.07863,0.49712,0.00839,-0.04939,0.06846,0.78474
 eohi_val,cal_true,1063,-0.0358,-0.0949,0.02654,0.24352,-0.0081,-0.07342,0.05105,0.79199
 eohi_pref,cal_15,1063,0.02312,-0.04369,0.0815,0.45148,0.00755,-0.05412,0.06986,0.80582
 eohi_pref,cal_false,1063,0.02725,-0.03088,0.08754,0.37477,0.00736,-0.05157,0.06691,0.8106
 eohiDGEN_pref,cal_false,1063,0.00873,-0.04608,0.05948,0.77614,0.00453,-0.05188,0.05691,0.88276
 eohi_pref,cal_35,1063,0.02663,-0.03409,0.08569,0.38574,0.00424,-0.05505,0.06505,0.89016
 eohiDGEN_life,cal_global,1063,0.02651,-0.03474,0.08515,0.38781,0.00414,-0.06008,0.06383,0.89275
 eohiDGEN_val,cal_35,1063,0.0047,-0.04819,0.06294,0.87827,-0.00387,-0.06182,0.05569,0.89972
 eohi_pers,cal_75,1063,-0.01501,-0.08009,0.05265,0.62493,-0.00292,-0.06324,0.06169,0.92424
 eohi_life,cal_global,1063,0.01135,-0.04298,0.06703,0.71158,-0.00291,-0.0638,0.0565,0.92459
 eohiDGEN_life,cal_35,1063,0.01256,-0.04397,0.071,0.68263,-0.00226,-0.06355,0.06168,0.94137
 eohi_life,cal_55,1063,0.0154,-0.0459,0.07265,0.61609,0.00223,-0.05962,0.05937,0.94216
 eohiDGEN_mean,cal_35,1063,-0.01765,-0.07148,0.0442,0.5654,-0.0016,-0.05665,0.05409,0.95856
 eohiDGEN_val,cal_75,1063,0.02365,-0.04062,0.08878,0.44107,-0.00147,-0.06785,0.06103,0.96186
 eohi_pref,cal_true,1063,-0.00826,-0.06273,0.05423,0.78791,0.00098,-0.05588,0.06502,0.97445
--- a/eohi1/eohi_process.xlsm
+++ b/eohi1/eohi_process.xlsm
--- a/eohi1/exp1.csv
+++ b/eohi1/exp1.csv
--- a/eohi1/exp1_TF_descriptives.csv
+++ b/eohi1/exp1_TF_descriptives.csv
@ -0,0 +1,33 @@
 variable,accuracy,TF,n_total,n_correct,prop_correct,match_difficulty,Table A1,Table A2,match_difficulty,,,,,,,
 gest_75_T,75,T,1063,952,0.89558,NO,0.75,0.9,M,,,,,,,
 dors_55_T,55,T,1063,949,0.89276,NO,0.64,0.96,M,,,,,,,
 chur_75_T,75,T,1063,947,0.89087,NO,0.83,0.9,M,,,,,,,
 mons_55_T,55,T,1063,944,0.88805,NO,0.55,0.82,H,,,,,,,
 lock_35_T,35,T,1063,923,0.8683,NO,0.45,0.9,M,,,,,,,
 hume_35_F,35,F,1063,873,0.82126,NO,0.43,0.94,M,,,,,,,
 papy_75_T,75,T,1063,873,0.82126,YES,0.78,0.92,M,,,,,,,
 sham_55_T,55,T,1063,860,0.80903,NO,0.57,1,M,,,,,,,0.665565
 list_75_F,75,F,1063,855,0.80433,YES,0.85,0.96,L,,,,,,,
 cons_55_T,55,T,1063,816,0.76764,NO,0.57,0.9,M,,,,,,,
 tsun_75_T,75,T,1063,813,0.76482,YES,0.8,0.96,L,,,,,,,
 pana_35_T,35,T,1063,805,0.75729,NO,0.37,0.65,H,,,,,,,
 kabu_15_T,15,T,1063,791,0.74412,NO,0.25,0.76,M,,,,,,,
 gulf_15_F,15,F,1063,773,0.72719,NO,0.16,0.24,H,,,,,,,
 oedi_35_T,35,T,1063,767,0.72154,NO,0.43,0.88,M,,,,,,,
 vaud_15_T,15,T,1063,766,0.7206,NO,0.24,0.41,H,,,,,,,
 mont_35_F,35,F,1063,748,0.70367,NO,0.36,0.78,M,,,,,,,
 demo_15_F,15,F,1063,727,0.68391,NO,0.19,0.39,H,,,,,,,
 spee_75_F,75,F,1063,688,0.64722,NO,0.78,0.98,L,,,,,,,
 dwar_55_F,55,F,1063,663,0.62371,YES,0.64,0.76,L,,,,,,,
 carb_35_T,35,T,1063,629,0.59172,NO,0.39,0.31,H,,,,,,,
 bohr_15_T,15,T,1063,626,0.5889,NO,0.16,0.37,H,,,,,,,
 gang_15_F,15,F,1063,572,0.5381,NO,0.21,0.39,H,,,,,,,
 vitc_55_F,55,F,1063,556,0.52305,NO,0.63,0.84,L,,,,,,,
 hert_35_F,35,F,1063,551,0.51834,NO,0.41,0.65,M,,,,,,,
 pucc_15_F,15,F,1063,543,0.51082,NO,0.15,0.22,H,,,,,,,
 troy_15_T,15,T,1063,504,0.47413,NO,0.22,0.49,M,,,,,,,
 moza_55_F,55,F,1063,500,0.47037,NO,0.59,0.71,L,,,,,,,
 croc_75_F,75,F,1063,433,0.40734,NO,0.76,0.88,L,,,,,,,
 gees_55_F,55,F,1063,324,0.3048,NO,0.57,0.84,L,,,,,,,
 lute_35_F,35,F,1063,322,0.30292,NO,0.46,0.67,L,,,,,,,
 memo_75_F,75,F,1063,265,0.24929,NO,0.83,0.88,L,,,,,,,
--- a/eohi1/interaction_DGEN_assumptions.png
+++ b/eohi1/interaction_DGEN_assumptions.png
--- a/eohi1/interaction_domain_assumptions.png
+++ b/eohi1/interaction_domain_assumptions.png
--- a/eohi1/linearity_plots.pdf
+++ b/eohi1/linearity_plots.pdf
--- a/eohi1/mixed
+++ b/eohi1/mixed
@ -0,0 +1,875 @@
 # Mixed ANOVA Analysis for DGEN Variables
 # EOHI Experiment Data Analysis - DGEN Level Analysis
 # Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
 #           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
 # Load required libraries
 library(tidyverse)
 library(ez)
 library(car)
 library(afex)         # For aov_ez (cleaner ANOVA output)
 library(nortest)      # For normality tests
 library(emmeans)      # For post-hoc comparisons
 library(purrr)        # For map functions
 library(effsize)      # For Cohen's d calculations
 library(effectsize)   # For effect size calculations
 # Global options to remove scientific notation
 options(scipen = 999)
 # Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
 options(contrasts = c("contr.sum", "contr.poly"))
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Read the data
 data <- read.csv("exp1.csv")
 # Verify the specific variables we need
 required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
 missing_vars <- required_vars[!required_vars %in% colnames(data)]
 if (length(missing_vars) > 0) {
  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
 }
 # Define domain mapping
 domain_mapping <- data.frame(
  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
  time = c(rep("Past", 4), rep("Future", 4)),
  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
  stringsAsFactors = FALSE
 )
 # Efficient data pivoting using pivot_longer
 long_data <- data %>%
  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
  pivot_longer(
    cols = all_of(required_vars),
    names_to = "variable",
    values_to = "DGEN_SCORE"
  ) %>%
  left_join(domain_mapping, by = "variable") %>%
  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
  mutate(
    TIME = factor(time, levels = c("Past", "Future")),
    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
    pID = as.factor(pID),
    TEMPORAL_DO = as.factor(TEMPORAL_DO)
  ) %>%
  # Select final columns and remove any rows with missing values
  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
  filter(!is.na(DGEN_SCORE))
 # =============================================================================
 # DESCRIPTIVE STATISTICS
 # =============================================================================
 # Overall descriptive statistics by TIME and DOMAIN
 desc_stats <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n = n(),
    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TIME and DOMAIN:")
 print(desc_stats)
 # Descriptive statistics by between-subjects factors
 desc_stats_by_temporal <- long_data %>%
  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
  summarise(
    n = n(),
    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
 print(desc_stats_by_temporal)
 # =============================================================================
 # ASSUMPTION TESTING
 # =============================================================================
 # Remove missing values for assumption testing
 long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
 # 1. Missing values check
 missing_summary <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n_total = n(),
    n_missing = sum(is.na(DGEN_SCORE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups = 'drop'
  )
 print("Missing values by TIME and DOMAIN:")
 print(missing_summary)
 # 2. Outlier detection
 outlier_summary <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n = n(),
    mean = mean(DGEN_SCORE),
    sd = sd(DGEN_SCORE),
    q1 = quantile(DGEN_SCORE, 0.25),
    q3 = quantile(DGEN_SCORE, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 1.5 * iqr,
    upper_bound = q3 + 1.5 * iqr,
    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
    .groups = 'drop'
  )
 print("Outlier summary (IQR method):")
 print(outlier_summary)
 # 3. Anderson-Darling normality test (streamlined)
 normality_results <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n = n(),
    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
    .groups = 'drop'
  )
 print("Anderson-Darling normality test results:")
 # Round only the numeric columns
 normality_results_rounded <- normality_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 5)))
 print(normality_results_rounded)
 # 4. Homogeneity of variance (Levene's test)
 # Test homogeneity across TIME within each DOMAIN
 homogeneity_time <- long_data_clean %>%
  group_by(DOMAIN) %>%
  summarise(
    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across TIME within each DOMAIN:")
 print(homogeneity_time)
 # Test homogeneity across DOMAIN within each TIME
 homogeneity_domain <- long_data_clean %>%
  group_by(TIME) %>%
  summarise(
    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across DOMAIN within each TIME:")
 print(homogeneity_domain)
 # =============================================================================
 # HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
 # =============================================================================
 # Function to calculate Hartley's F-max ratio
 calculate_hartley_ratio <- function(variances) {
  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
 }
 # =============================================================================
 # CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
 # =============================================================================
 # For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
 # within each combination of within-subjects factors (TIME × DOMAIN)
 print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
 observed_temporal_ratios <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
    # Calculate F-max ratio
    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
    .groups = 'drop'
  ) %>%
  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
 print(observed_temporal_ratios)
 # More efficient bootstrap function for Hartley's F-max test
 bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
  # Get unique groups and their sample sizes
  groups <- unique(data[[group_var]])
  # Calculate observed variances for each group
  observed_vars <- data %>%
    dplyr::group_by(!!rlang::sym(group_var)) %>%
    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
    dplyr::pull(var)
  # Handle invalid variances
  if(any(observed_vars <= 0 | is.na(observed_vars))) {
    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
  }
  # Calculate observed F-max ratio
  observed_ratio <- max(observed_vars) / min(observed_vars)
  # Pre-allocate storage for bootstrap ratios
  bootstrap_ratios <- numeric(n_iter)
  # Get group data once
  group_data_list <- map(groups, ~ {
    group_data <- data[data[[group_var]] == .x, response_var]
    group_data[!is.na(group_data)]
  })
  # Bootstrap with pre-allocated storage
  for(i in 1:n_iter) {
    # Bootstrap sample from each group independently
    sample_vars <- map_dbl(group_data_list, ~ {
      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
      var(bootstrap_sample, na.rm = TRUE)
    })
    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
  }
  # Remove invalid ratios
  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
  if(length(valid_ratios) == 0) {
    stop("No valid bootstrap ratios generated")
  }
  # Calculate critical value (95th percentile)
  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
  # Return only essential information
  return(list(
    observed_ratio = observed_ratio,
    critical_95 = critical_95,
    n_valid_iterations = length(valid_ratios)
  ))
 }
 # Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
 print("=== HARTLEY'S F-MAX TEST RESULTS ===")
 set.seed(123)  # For reproducibility
 hartley_temporal_results <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
    .groups = 'drop'
  ) %>%
  mutate(
    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
    significant = observed_ratio > critical_95
  ) %>%
  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
 print(hartley_temporal_results)
 # =============================================================================
 # MIXED ANOVA ANALYSIS
 # =============================================================================
 # Check if design is balanced
 design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
 if(all(design_balance %in% c(0, 1))) {
  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
 } else {
  print("Warning: Design is unbalanced")
  print(summary(as.vector(design_balance)))
 }
 # =============================================================================
 # MIXED ANOVA WITH SPHERICITY CORRECTIONS
 # =============================================================================
 print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
 # Mixed ANOVA using ezANOVA with automatic sphericity corrections
 # Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
 # Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
 mixed_anova_model <- ezANOVA(data = long_data_clean, 
                             dv = DGEN_SCORE, 
                             wid = pID, 
                             between = TEMPORAL_DO, 
                             within = .(TIME, DOMAIN),
                             type = 3,
                             detailed = TRUE)
 anova_output <- mixed_anova_model$ANOVA
 rownames(anova_output) <- NULL  # Reset row numbers to be sequential
 print(anova_output)
 # Show Mauchly's test for sphericity
 print("Mauchly's Test of Sphericity:")
 print(mixed_anova_model$Mauchly)
 # Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
 if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
  print(mixed_anova_model$`Sphericity Corrections`)
  # Extract and display corrected degrees of freedom
  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
  anova_table <- mixed_anova_model$ANOVA
  corrected_df <- data.frame(
    Effect = sphericity_corr$Effect,
    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    GG_epsilon = sphericity_corr$GGe,
    HF_epsilon = sphericity_corr$HFe
  )
  print(corrected_df)
  cat("\n=== CORRECTED F-TESTS ===\n")
  for(i in seq_len(nrow(corrected_df))) {
    effect <- corrected_df$Effect[i]
    f_value <- anova_table$F[match(effect, anova_table$Effect)]
    cat(sprintf("\n%s:\n", effect))
    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
  }
 } else {
  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
 }
 # =============================================================================
 # EFFECT SIZES (GENERALIZED ETA SQUARED)
 # =============================================================================
 print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
 # Extract generalized eta squared from ezANOVA (already calculated)
 effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
 effect_sizes$ges <- round(effect_sizes$ges, 5)
 print("Generalized Eta Squared:")
 print(effect_sizes)
 # =============================================================================
 # POST-HOC COMPARISONS
 # =============================================================================
 # Post-hoc comparisons using emmeans
 print("\n=== POST-HOC COMPARISONS ===")
 # Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
 aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
                 data = long_data_clean)
 # Main effect of TIME
 print("Main Effect of TIME:")
 time_emmeans <- emmeans(aov_model, ~ TIME)
 print("Estimated Marginal Means:")
 print(time_emmeans)
 print("\nPairwise Contrasts:")
 time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
 print(time_contrasts)
 # Main effect of DOMAIN
 print("\nMain Effect of DOMAIN:")
 domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
 print("Estimated Marginal Means:")
 print(domain_emmeans)
 print("\nPairwise Contrasts:")
 domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
 print(domain_contrasts)
 # Main effect of TEMPORAL_DO
 print("\nMain Effect of TEMPORAL_DO:")
 temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
 temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
 print(temporal_contrasts)
 # =============================================================================
 # INTERACTION EXPLORATIONS
 # =============================================================================
 # TEMPORAL_DO × TIME Interaction
 print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
 temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
 print("Estimated Marginal Means:")
 print(temporal_time_emmeans)
 print("\nSimple Effects of TIME within each TEMPORAL_DO:")
 temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
 print(temporal_time_simple)
 print("\nSimple Effects of TEMPORAL_DO within each TIME:")
 temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
 print(temporal_time_simple2)
 # TIME × DOMAIN Interaction
 print("\n=== TIME × DOMAIN INTERACTION ===")
 time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
 print("Estimated Marginal Means:")
 print(time_domain_emmeans)
 print("\nSimple Effects of DOMAIN within each TIME:")
 time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
 print(time_domain_simple)
 print("\nSimple Effects of TIME within each DOMAIN:")
 time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
 print(time_domain_simple2)
 # TEMPORAL_DO × DOMAIN Interaction
 print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
 temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
 print("Estimated Marginal Means:")
 print(temporal_domain_emmeans)
 print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
 temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
 print(temporal_domain_simple)
 print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
 temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
 print(temporal_domain_simple2)
 # =============================================================================
 # THREE-WAY INTERACTION ANALYSIS
 # =============================================================================
 print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
 three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
 print("Estimated Marginal Means:")
 print(three_way_emmeans)
 print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
 three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
 print(three_way_contrasts)
 # =============================================================================
 # COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
 # =============================================================================
 # Cohen's d calculations (library already loaded)
 print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
 # Function to calculate Cohen's d for pairwise comparisons
 calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
  if(nrow(significant_pairs) > 0) {
    cat("Significant pairwise comparisons (p < 0.05):\n")
    print(significant_pairs)
    cat("\nCohen's d calculated from raw data:\n")
    for(i in seq_len(nrow(significant_pairs))) {
      comparison <- significant_pairs[i, ]
      contrast_name <- as.character(comparison$contrast)
      # Parse the contrast
      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
      if(length(contrast_parts) == 2) {
        level1 <- trimws(contrast_parts[1])
        level2 <- trimws(contrast_parts[2])
        # Get raw data for both conditions
        if(group2_var %in% colnames(comparison)) {
          group2_level <- as.character(comparison[[group2_var]])
          data1 <- data[[response_var]][
            data[[group1_var]] == level1 & 
            data[[group2_var]] == group2_level]
          data2 <- data[[response_var]][
            data[[group1_var]] == level2 & 
            data[[group2_var]] == group2_level]
        } else {
          data1 <- data[[response_var]][data[[group1_var]] == level1]
          data2 <- data[[response_var]][data[[group1_var]] == level2]
        }
        if(length(data1) > 0 && length(data2) > 0) {
          # Calculate Cohen's d using effsize package
          cohens_d_result <- cohen.d(data1, data2)
          cat(sprintf("Comparison: %s", contrast_name))
          if(group2_var %in% colnames(comparison)) {
            cat(sprintf(" | %s", group2_level))
          }
          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
          cat("\n")
        }
      }
    }
  } else {
    cat("No significant pairwise comparisons found.\n")
  }
 }
 # =============================================================================
 # 1. TEMPORAL_DO × TIME INTERACTION
 # =============================================================================
 print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
 # Get simple effects of TIME within each TEMPORAL_DO
 temporal_time_simple_df <- as.data.frame(temporal_time_simple)
 calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
 # Get simple effects of TEMPORAL_DO within each TIME  
 temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
 calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
 # =============================================================================
 # 2. TIME × DOMAIN INTERACTION
 # =============================================================================
 print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
 # Get simple effects of TIME within each DOMAIN
 time_domain_simple2_df <- as.data.frame(time_domain_simple2)
 calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
 # Get simple effects of DOMAIN within each TIME
 time_domain_simple_df <- as.data.frame(time_domain_simple)
 calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
 # =============================================================================
 # 3. TEMPORAL_DO × DOMAIN INTERACTION
 # =============================================================================
 print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
 # Get simple effects of TEMPORAL_DO within each DOMAIN
 temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
 calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
 # Get simple effects of DOMAIN within each TEMPORAL_DO
 temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
 calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
 # =============================================================================
 # 4. THREE-WAY INTERACTION COHEN'S D
 # =============================================================================
 print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
 # Get pairwise comparisons for the three-way interaction
 three_way_contrasts_df <- as.data.frame(three_way_contrasts)
 print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
 print(three_way_contrasts_df)
 # Calculate Cohen's d for significant three-way interaction effects
 print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
 # Extract significant comparisons (p < 0.05)
 significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
 if(nrow(significant_three_way) > 0) {
  for(i in seq_len(nrow(significant_three_way))) {
    comparison <- significant_three_way[i, ]
    # Extract the grouping variables
    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
    domain_level <- as.character(comparison$DOMAIN)
    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
    past_data <- long_data_clean$DGEN_SCORE[
      long_data_clean$TEMPORAL_DO == temporal_do_level & 
      long_data_clean$DOMAIN == domain_level & 
      long_data_clean$TIME == "Past"
    ]
    future_data <- long_data_clean$DGEN_SCORE[
      long_data_clean$TEMPORAL_DO == temporal_do_level & 
      long_data_clean$DOMAIN == domain_level & 
      long_data_clean$TIME == "Future"
    ]
    if(length(past_data) > 0 && length(future_data) > 0) {
      # Calculate Cohen's d using effsize package
      cohens_d_result <- cohen.d(past_data, future_data)
      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
      cat(sprintf("  Past vs Future comparison\n"))
      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
      cat("\n")
    }
  }
 } else {
  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
 }
 # =============================================================================
 # INTERACTION PLOTS
 # =============================================================================
 print("=== INTERACTION PLOTS ===")
 # Define color palette for DOMAIN (4 levels)
 domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
                   "Values" = "#FFB000", "Life" = "#FE6100")
 # TEMPORAL_DO × DOMAIN INTERACTION PLOT
 # Create estimated marginal means for TEMPORAL_DO × DOMAIN
 emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
 # Prepare emmeans data frame
 emmeans_temporal_domain <- emm_temporal_domain %>%
  as.data.frame() %>%
  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
  rename(
    ci_lower = lower.CL,
    ci_upper = upper.CL,
    plot_mean = emmean
  ) %>%
  mutate(
    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
  )
 # Prepare raw data for plotting
 iPlot_temporal_domain <- long_data_clean %>%
  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
  mutate(
    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
  )
 # Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
 # Convert to numeric x-axis and add position offsets for dodging
 dodge_width <- 0.6
 iPlot_temporal_domain <- iPlot_temporal_domain %>%
  mutate(
    x_pos = as.numeric(TEMPORAL_DO),
    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
    x_dodged = x_pos + domain_offset
  )
 emmeans_temporal_domain <- emmeans_temporal_domain %>%
  mutate(
    x_pos = as.numeric(TEMPORAL_DO),
    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
    x_dodged = x_pos + domain_offset
  )
 interaction_plot_temporal_domain <- ggplot() +
  # Distribution layer - violins (completely separated)
  geom_violin(
    data = iPlot_temporal_domain,
    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
    alpha = 0.4,
    color = NA,
    trim = FALSE,
    scale = "width",
    width = dodge_width / 4
  ) +
  # Emmeans error bars
  geom_errorbar(
    data = emmeans_temporal_domain,
    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
    width = 0.08,
    linewidth = 0.8,
    color = "black"
  ) +
  # Emmeans points
  geom_point(
    data = emmeans_temporal_domain,
    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
    size = 4,
    stroke = 1,
    color = "black"
  ) +
  labs(
    x = "Order", 
    y = "Mean absolute difference from present",
    title = "TEMPORAL_DO × DOMAIN Interaction"
  ) +
  scale_x_continuous(
    breaks = c(1, 2),
    labels = c("Past First", "Future First"),
    limits = c(0.4, 2.6)
  ) +
  scale_y_continuous(
    limits = c(0, 10),
    breaks = seq(0, 10, 2)
  ) +
  scale_color_manual(name = "Domain", values = domain_colors) +
  scale_fill_manual(name = "Domain", values = domain_colors) +
  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, hjust = 0.5),
    legend.position = "right",
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )
 print(interaction_plot_temporal_domain)
 # =============================================================================
 # EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
 # =============================================================================
 # Create fresh emmeans data for emmeans-only plot
 emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
 emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
  as.data.frame() %>%
  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
  rename(
    ci_lower = lower.CL,
    ci_upper = upper.CL,
    plot_mean = emmean
  ) %>%
  mutate(
    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
    x_pos = as.numeric(TEMPORAL_DO),
    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
    x_dodged = x_pos + domain_offset
  )
 # Create emmeans-only plot
 interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
  geom_errorbar(
    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
    width = 0.1,
    linewidth = 1,
    alpha = 0.8
  ) +
  geom_point(
    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
    size = 5,
    stroke = 1.2,
    color = "black"
  ) +
  labs(
    x = "Order", 
    y = "Absolute difference from the present",
    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
  ) +
  scale_x_continuous(
    breaks = c(1, 2),
    labels = c("Past First", "Future First"),
    limits = c(0.4, 2.6)
  ) +
  scale_y_continuous(
    limits = c(3, 6),
    breaks = seq(0, 10, 1)
  ) +
  scale_color_manual(name = "Domain", values = domain_colors) +
  scale_fill_manual(name = "Domain", values = domain_colors) +
  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, hjust = 0.5),
    legend.position = "right",
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )
 print(interaction_plot_emmeans_only)
 # =============================================================================
 # MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
 # =============================================================================
 # Prepare emmeans data frame for TIME main effect
 time_main_emm_df <- time_emmeans %>%
  as.data.frame() %>%
  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
  rename(
    ci_lower = lower.CL,
    ci_upper = upper.CL,
    plot_mean = emmean
  ) %>%
  mutate(
    TIME = factor(TIME, levels = c("Past", "Future"))
  )
 # Define color palette for TIME
 time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
 # Create TIME main-effect plot (style aligned with existing emmeans-only plot)
 time_main_plot <- ggplot(time_main_emm_df) +
  geom_errorbar(
    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
    width = 0.15,
    linewidth = 1,
    alpha = 0.8
  ) +
  geom_point(
    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
    size = 5,
    stroke = 1.2,
    color = "black"
  ) +
  labs(
    x = "Time",
    y = "Absolute difference from the present",
    title = "Main Effect of TIME (Estimated Marginal Means)"
  ) +
  scale_color_manual(name = "Temporal Direction", values = time_colors) +
  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, hjust = 0.5),
    legend.position = "right",
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )
 print(time_main_plot) 
--- a/eohi1/mixed
+++ b/eohi1/mixed
@ -0,0 +1,769 @@
 # Mixed ANOVA Analysis for Domain Means
 # EOHI Experiment Data Analysis - Domain Level Analysis
 # Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
 #           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
 library(tidyverse)
 library(ez)
 library(car)
 library(nortest)      # For normality tests
 library(emmeans)      # For post-hoc comparisons
 library(purrr)        # For map functions
 library(effsize)      # For Cohen's d calculations
 library(effectsize)   # For effect size calculations
 library(ggplot2)      # For plotting
 options(scipen = 999)
 options(contrasts = c("contr.sum", "contr.poly"))
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Read the data
 data <- read.csv("exp1.csv")
 required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
 # Define domain mapping
 domain_mapping <- data.frame(
  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
  time = c(rep("Past", 4), rep("Future", 4)),
  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
  stringsAsFactors = FALSE
 )
 long_data <- data %>%
  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
  pivot_longer(
    cols = all_of(required_vars),
    names_to = "variable",
    values_to = "MEAN_DIFFERENCE"
  ) %>%
  left_join(domain_mapping, by = "variable") %>%
  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
  mutate(
    TIME = factor(time, levels = c("Past", "Future")),
    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
    pID = as.factor(pID),
    TEMPORAL_DO = as.factor(TEMPORAL_DO)
  ) %>%
  # Select final columns and remove any rows with missing values
  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
  filter(!is.na(MEAN_DIFFERENCE))
 # Overall descriptive statistics by TIME and DOMAIN
 desc_stats <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print(desc_stats)
 # Descriptive statistics by between-subjects factors
 desc_stats_by_temporal <- long_data %>%
  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print(desc_stats_by_temporal)
 # ASSUMPTION TESTING
 # 1. Missing values check
 missing_summary <- long_data %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n_total = n(),
    n_missing = sum(is.na(MEAN_DIFFERENCE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups = 'drop'
  )
 print(missing_summary)
 # Create clean dataset (long_data is already filtered for NA values)
 long_data_clean <- long_data
 # 2. Outlier detection
 outlier_summary <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n = n(),
    mean = mean(MEAN_DIFFERENCE),
    sd = sd(MEAN_DIFFERENCE),
    q1 = quantile(MEAN_DIFFERENCE, 0.25),
    median = median(MEAN_DIFFERENCE),
    q3 = quantile(MEAN_DIFFERENCE, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 1.5 * iqr,
    upper_bound = q3 + 1.5 * iqr,
    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
    .groups = 'drop'
  )
 print(outlier_summary)
 # 3. Anderson-Darling normality test (streamlined)
 normality_results <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    n = n(),
    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
    .groups = 'drop'
  )
 print(normality_results)
 # 4. Homogeneity of variance (Levene's test)
 # Test homogeneity across TIME within each DOMAIN
 homogeneity_time <- long_data_clean %>%
  group_by(DOMAIN) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print(homogeneity_time)
 # Test homogeneity across DOMAIN within each TIME
 homogeneity_domain <- long_data_clean %>%
  group_by(TIME) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print(homogeneity_domain)
 # HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
 # Function to calculate Hartley's F-max ratio
 calculate_hartley_ratio <- function(variances) {
  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
 }
 # For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
 # within each combination of within-subjects factors (TIME × DOMAIN)
 print(unique(long_data_clean$TEMPORAL_DO))
 print(table(long_data_clean$TEMPORAL_DO))
 observed_temporal_ratios <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
    # Calculate F-max ratio
    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
    .groups = 'drop'
  ) %>%
  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
 print(observed_temporal_ratios)
 # More efficient bootstrap function for Hartley's F-max test
 bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
  # Get unique groups and their sample sizes
  groups <- unique(data[[group_var]])
  # Calculate observed variances for each group
  observed_vars <- data %>%
    dplyr::group_by(!!rlang::sym(group_var)) %>%
    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
    dplyr::pull(var)
  # Handle invalid variances
  if(any(observed_vars <= 0 | is.na(observed_vars))) {
    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
  }
  # Calculate observed F-max ratio
  observed_ratio <- max(observed_vars) / min(observed_vars)
  # Pre-allocate storage for bootstrap ratios
  bootstrap_ratios <- numeric(n_iter)
  # Get group data once
  group_data_list <- map(groups, ~ {
    group_data <- data[data[[group_var]] == .x, response_var]
    group_data[!is.na(group_data)]
  })
  # Bootstrap with pre-allocated storage
  for(i in 1:n_iter) {
    # Bootstrap sample from each group independently
    sample_vars <- map_dbl(group_data_list, ~ {
      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
      var(bootstrap_sample, na.rm = TRUE)
    })
    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
  }
  # Remove invalid ratios
  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
  if(length(valid_ratios) == 0) {
    stop("No valid bootstrap ratios generated")
  }
  # Calculate critical value (95th percentile)
  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
  # Return only essential information
  return(list(
    observed_ratio = observed_ratio,
    critical_95 = critical_95,
    n_valid_iterations = length(valid_ratios)
  ))
 }
 # Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
 set.seed(123)  # For reproducibility
 hartley_temporal_results <- long_data_clean %>%
  group_by(TIME, DOMAIN) %>%
  summarise(
    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
    .groups = 'drop'
  ) %>%
  mutate(
    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
    significant = observed_ratio > critical_95
  ) %>%
  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
 print(hartley_temporal_results)
 # MIXED ANOVA ANALYSIS
 # Check for complete cases
 complete_cases <- sum(complete.cases(long_data_clean))
 print(complete_cases)
 # Check if design is balanced
 design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
 if(all(design_balance %in% c(0, 1))) {
  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
 } else {
  print("Warning: Design is unbalanced")
  print(summary(as.vector(design_balance)))
 }
 # MIXED ANOVA WITH SPHERICITY CORRECTIONS
 # Mixed ANOVA using ezANOVA with automatic sphericity corrections
 # Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
 # Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
 mixed_anova_model <- ezANOVA(data = long_data_clean, 
                             dv = MEAN_DIFFERENCE, 
                             wid = pID, 
                             between = TEMPORAL_DO, 
                             within = .(TIME, DOMAIN),
                             type = 3,
                             detailed = TRUE)
 anova_output <- mixed_anova_model$ANOVA
 rownames(anova_output) <- NULL  # Reset row numbers to be sequential
 print(anova_output)
 # Show Mauchly's test for sphericity
 print(mixed_anova_model$Mauchly)
 # Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
 if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
  print(mixed_anova_model$`Sphericity Corrections`)
  # Extract and display corrected degrees of freedom
  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
  anova_table <- mixed_anova_model$ANOVA
  corrected_df <- data.frame(
    Effect = sphericity_corr$Effect,
    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    GG_epsilon = sphericity_corr$GGe,
    HF_epsilon = sphericity_corr$HFe
  )
  print(corrected_df)
  # Between-subjects effects (no sphericity corrections needed)
  between_effects <- c("TEMPORAL_DO")
  for(effect in between_effects) {
    if(effect %in% anova_table$Effect) {
      f_value <- anova_table$F[anova_table$Effect == effect]
      dfn <- anova_table$DFn[anova_table$Effect == effect]
      dfd <- anova_table$DFd[anova_table$Effect == effect]
      p_value <- anova_table$p[anova_table$Effect == effect]
      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
    }
  }
  # Within-subjects effects (sphericity corrections where applicable)
  # TIME main effect (2 levels, sphericity automatically satisfied)
  if("TIME" %in% anova_table$Effect) {
    f_value <- anova_table$F[anova_table$Effect == "TIME"]
    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
    p_value <- anova_table$p[anova_table$Effect == "TIME"]
    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
  }
  # DOMAIN main effect (4 levels, needs sphericity correction)
  if("DOMAIN" %in% anova_table$Effect) {
    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
  }
  # Interactions with sphericity corrections
  for(i in seq_len(nrow(corrected_df))) {
    effect <- corrected_df$Effect[i]
    f_value <- anova_table$F[match(effect, anova_table$Effect)]
    cat(sprintf("\n%s:\n", effect))
    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
  }
 } else {
  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
 }
 # COHEN'S D FOR MAIN EFFECTS
 # Create aov model for emmeans 
 aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
                 data = long_data_clean)
 # Main Effect of TIME
 time_emmeans <- emmeans(aov_model, ~ TIME)
 print(time_emmeans)
 time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
 time_main_df <- as.data.frame(time_main_contrast)
 print(time_main_df)
 # Calculate Cohen's d for TIME main effect
 if(nrow(time_main_df) > 0) {
  cat("\nCohen's d for TIME main effect:\n")
  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
  time_cohens_d <- cohen.d(time_past_data, time_future_data)
  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
 }
 # Main Effect of DOMAIN (significant: p < 0.001)
 domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
 print(domain_emmeans)
 domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
 domain_main_df <- as.data.frame(domain_main_contrast)
 print(domain_main_df)
 # Calculate Cohen's d for significant DOMAIN contrasts
 significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
 if(nrow(significant_domain) > 0) {
  cat("\nCohen's d for significant DOMAIN contrasts:\n")
  for(i in seq_len(nrow(significant_domain))) {
    contrast_name <- as.character(significant_domain$contrast[i])
    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
    if(length(contrast_parts) == 2) {
      level1 <- trimws(contrast_parts[1])
      level2 <- trimws(contrast_parts[2])
      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
      if(length(data1) > 0 && length(data2) > 0) {
        domain_cohens_d <- cohen.d(data1, data2)
        cat(sprintf("Comparison: %s\n", contrast_name))
        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
        cat("\n")
      }
    }
  }
 }
 # SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
 # 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
 temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
 temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
 print(temporal_time_simple)
 # 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
 time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
 time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
 print(time_domain_simple)
 # COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
 # Function to calculate Cohen's d for pairwise comparisons
 calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
  if(nrow(significant_pairs) > 0) {
    cat("Significant pairwise comparisons (p < 0.05):\n")
    print(significant_pairs)
    cat("\nCohen's d calculated from raw data:\n")
    for(i in seq_len(nrow(significant_pairs))) {
      comparison <- significant_pairs[i, ]
      contrast_name <- as.character(comparison$contrast)
      # Parse the contrast
      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
      if(length(contrast_parts) == 2) {
        level1 <- trimws(contrast_parts[1])
        level2 <- trimws(contrast_parts[2])
        # Get raw data for both conditions
        if(group2_var %in% colnames(comparison)) {
          group2_level <- as.character(comparison[[group2_var]])
          data1 <- data[[response_var]][
            data[[group1_var]] == level1 & 
            data[[group2_var]] == group2_level]
          data2 <- data[[response_var]][
            data[[group1_var]] == level2 & 
            data[[group2_var]] == group2_level]
        } else {
          data1 <- data[[response_var]][data[[group1_var]] == level1]
          data2 <- data[[response_var]][data[[group1_var]] == level2]
        }
        if(length(data1) > 0 && length(data2) > 0) {
          # Calculate Cohen's d using effsize package
          cohens_d_result <- cohen.d(data1, data2)
          cat(sprintf("Comparison: %s", contrast_name))
          if(group2_var %in% colnames(comparison)) {
            cat(sprintf(" | %s", group2_level))
          }
          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
          cat("\n")
        }
      }
    }
  } else {
    cat("No significant pairwise comparisons found.\n")
  }
 }
 # 1. TEMPORAL_DO × TIME INTERACTION 
 # Get simple effects of TIME within each TEMPORAL_DO
 temporal_time_simple_df <- as.data.frame(temporal_time_simple)
 calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
 # 2. TIME × DOMAIN INTERACTION 
 # Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
 time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
 time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
 print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
 print("Past vs Future contrasts for each domain:")
 print(time_domain_simple_df)
 # Calculate Cohen's d for Past vs Future contrasts within each domain
 print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
 significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
 if(nrow(significant_time_domain) > 0) {
  print("Significant Past vs Future contrasts within domains (p < 0.05):")
  print(significant_time_domain)
  print("\nCohen's d calculations for Past vs Future within each domain:")
  for(i in seq_len(nrow(time_domain_simple_df))) {
    comparison <- time_domain_simple_df[i, ]
    domain_level <- as.character(comparison$DOMAIN)
    # Get data for Past and Future within this domain
    past_data <- long_data_clean$MEAN_DIFFERENCE[
      long_data_clean$DOMAIN == domain_level & 
      long_data_clean$TIME == "Past"
    ]
    future_data <- long_data_clean$MEAN_DIFFERENCE[
      long_data_clean$DOMAIN == domain_level & 
      long_data_clean$TIME == "Future"
    ]
    if(length(past_data) > 0 && length(future_data) > 0) {
      # Calculate Cohen's d using effsize package
      cohens_d_result <- cohen.d(past_data, future_data)
      cat(sprintf("Domain: %s\n", domain_level))
      cat(sprintf("  Past vs Future comparison\n"))
      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
      cat("\n")
    }
  }
 } else {
  cat("No significant Past vs Future contrasts found within any domain.\n")
 }
 # INTERACTION PLOTS
 # Define color palettes
 time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
 domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
                   "Values" = "#FFB000", "Life" = "#FE6100")
 # Define TIME levels (Past, Future order)
 time_levels <- c("Past", "Future")
 # ============================================================
 # PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
 # ============================================================
 # Create fresh emmeans data for Plot 3
 emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
 emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
  as.data.frame() %>%
  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
  rename(
    ci_lower = lower.CL,
    ci_upper = upper.CL,
    plot_mean = emmean
  ) %>%
  mutate(
    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
    TIME = factor(TIME, levels = time_levels),
    x_pos = as.numeric(TEMPORAL_DO),
    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
    x_dodged = x_pos + time_offset
  )
 # Create simple emmeans-only plot
 interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
  geom_errorbar(
    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
    width = 0.1,
    linewidth = 1,
    alpha = 0.8
  ) +
  geom_point(
    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
    size = 5,
    stroke = 1.2,
    color = "black"
  ) +
  labs(
    x = "Order", 
    y = "Mean absolute difference from present",
    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
  ) +
  scale_x_continuous(
    breaks = c(1, 2),
    labels = c("Past First", "Future First"),
    limits = c(0.5, 2.5)
  ) +
  scale_color_manual(name = "Temporal Direction", values = time_colors) +
  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, hjust = 0.5),
    legend.position = "right",
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )
 print(interaction_plot_emmeans_only)
 # ============================================================
 # PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
 # ============================================================
 # Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
 emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
 # Prepare emmeans data frame for Plot 4
 emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
  as.data.frame() %>%
  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
  rename(
    ci_lower = lower.CL,
    ci_upper = upper.CL,
    plot_mean = emmean
  ) %>%
  mutate(
    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
    TIME = factor(TIME, levels = time_levels)
  )
 # Prepare raw data for plotting with position offsets
 dodge_width_plot4 <- 0.2
 iPlot_plot4 <- long_data_clean %>%
  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
  mutate(
    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
    TIME = factor(TIME, levels = time_levels),
    x_pos = as.numeric(DOMAIN),
    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
    x_dodged = x_pos + time_offset
  )
 emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
  mutate(
    x_pos = as.numeric(DOMAIN),
    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
    x_dodged = x_pos + time_offset
  )
 # Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
 interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
  geom_errorbar(
    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
    width = 0.1,
    linewidth = 1,
    alpha = 0.8
  ) +
  geom_point(
    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
    size = 5,
    stroke = 1.2,
    color = "black"
  ) +
  labs(
    x = "Domain", 
    y = "Mean absolute difference from present",
    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
  ) +
  scale_x_continuous(
    breaks = c(1, 2, 3, 4),
    labels = c("Preferences", "Personality", "Values", "Life"),
    limits = c(0.5, 4.5)
  ) +
  scale_color_manual(name = "Temporal Direction", values = time_colors) +
  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text = element_text(size = 11),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, hjust = 0.5),
    legend.position = "right",
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )
 print(interaction_plot_time_domain_plot4)
 # ============================================================
 # PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
 # ============================================================
 # Prepare emmeans data frame for TIME main effect
 time_main_emm_df <- time_emmeans %>%
  as.data.frame() %>%
  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
  rename(
    ci_lower = lower.CL,
    ci_upper = upper.CL,
    plot_mean = emmean
  ) %>%
  mutate(
    TIME = factor(TIME, levels = c("Past", "Future"))
  )
 # Create TIME main-effect plot (style aligned with existing emmeans-only plots)
 time_main_plot <- ggplot(time_main_emm_df) +
  geom_errorbar(
    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
    width = 0.15,
    linewidth = 1,
    alpha = 0.8
  ) +
  geom_point(
    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
    size = 5,
    stroke = 1.2,
    color = "black"
  ) +
  labs(
    x = "Time",
    y = "Mean absolute difference from present",
    title = "Main Effect of TIME (Estimated Marginal Means)"
  ) +
  scale_color_manual(name = "Temporal Direction", values = time_colors) +
  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
  theme_minimal(base_size = 13) +
  theme(
    axis.text = element_text(size = 11),
    axis.title = element_text(size = 12),
    plot.title = element_text(size = 14, hjust = 0.5),
    legend.position = "right",
    legend.title = element_text(size = 11),
    legend.text = element_text(size = 10),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
  )
 print(time_main_plot)
--- a/personality.r
+++ b/personality.r
@ -0,0 +1,765 @@
 # Mixed ANOVA Analysis for Personality Items
 # EOHI Experiment Data Analysis - Item Level Analysis
 # Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
 #           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
 # Load required libraries
 library(tidyverse)
 library(ez)
 library(car)
 library(afex)         # For aov_ez (cleaner ANOVA output)
 library(nortest)      # For normality tests
 library(emmeans)      # For post-hoc comparisons
 library(purrr)        # For map functions
 library(effsize)      # For Cohen's d calculations
 library(effectsize)   # For effect size calculations
 # Global options to remove scientific notation
 options(scipen = 999)
 # Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
 options(contrasts = c("contr.sum", "contr.poly"))
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Read the data
 data <- read.csv("exp1.csv")
 # Display basic information about the dataset
 print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
 print(paste("Number of participants:", length(unique(data$pID))))
 # Verify the specific variables we need
 required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
 missing_vars <- required_vars[!required_vars %in% colnames(data)]
 if (length(missing_vars) > 0) {
  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
 } else {
  print("All required personality item variables found!")
 }
 # Define item mapping
 item_mapping <- data.frame(
  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
  time = c(rep("Past", 5), rep("Future", 5)),
  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
  stringsAsFactors = FALSE
 )
 # Item mapping created
 # Efficient data pivoting using pivot_longer
 long_data <- data %>%
  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
  pivot_longer(
    cols = all_of(required_vars),
    names_to = "variable",
    values_to = "MEAN_DIFFERENCE"
  ) %>%
  left_join(item_mapping, by = "variable") %>%
  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
  mutate(
    TIME = factor(time, levels = c("Past", "Future")),
    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
    pID = as.factor(pID),
    TEMPORAL_DO = as.factor(TEMPORAL_DO)
  ) %>%
  # Select final columns and remove any rows with missing values
  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
  filter(!is.na(MEAN_DIFFERENCE))
 print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
 print(paste("Number of participants:", length(unique(long_data$pID))))
 # =============================================================================
 # DESCRIPTIVE STATISTICS
 # =============================================================================
 # Overall descriptive statistics by TIME and ITEM
 desc_stats <- long_data %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TIME and ITEM:")
 print(desc_stats)
 # Descriptive statistics by between-subjects factors
 desc_stats_by_temporal <- long_data %>%
  group_by(TEMPORAL_DO, TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
 print(desc_stats_by_temporal)
 # =============================================================================
 # ASSUMPTION TESTING
 # =============================================================================
 # Remove missing values for assumption testing
 long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
 print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
 # 1. Missing values check
 missing_summary <- long_data %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n_total = n(),
    n_missing = sum(is.na(MEAN_DIFFERENCE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups = 'drop'
  )
 print("Missing values by TIME and ITEM:")
 print(missing_summary)
 # 2. Outlier detection
 outlier_summary <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = mean(MEAN_DIFFERENCE),
    sd = sd(MEAN_DIFFERENCE),
    q1 = quantile(MEAN_DIFFERENCE, 0.25),
    q3 = quantile(MEAN_DIFFERENCE, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 1.5 * iqr,
    upper_bound = q3 + 1.5 * iqr,
    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
    .groups = 'drop'
  )
 print("Outlier summary (IQR method):")
 print(outlier_summary)
 # 3. Anderson-Darling normality test (streamlined)
 normality_results <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
    .groups = 'drop'
  )
 print("Anderson-Darling normality test results:")
 # Round only the numeric columns
 normality_results_rounded <- normality_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 5)))
 print(normality_results_rounded)
 # 4. Homogeneity of variance (Levene's test)
 # Test homogeneity across TIME within each ITEM
 homogeneity_time <- long_data_clean %>%
  group_by(ITEM) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across TIME within each ITEM:")
 print(homogeneity_time)
 # Test homogeneity across ITEM within each TIME
 homogeneity_item <- long_data_clean %>%
  group_by(TIME) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across ITEM within each TIME:")
 print(homogeneity_item)
 # =============================================================================
 # HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
 # =============================================================================
 # Function to calculate Hartley's F-max ratio
 calculate_hartley_ratio <- function(variances) {
  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
 }
 # =============================================================================
 # CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
 # =============================================================================
 # For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
 # within each combination of within-subjects factors (TIME × ITEM)
 # First, let's check what values TEMPORAL_DO actually has
 print("=== CHECKING TEMPORAL_DO VALUES ===")
 print("Unique TEMPORAL_DO values:")
 print(unique(long_data_clean$TEMPORAL_DO))
 print("TEMPORAL_DO value counts:")
 print(table(long_data_clean$TEMPORAL_DO))
 print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
 observed_temporal_ratios <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
    # Calculate F-max ratio
    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
    .groups = 'drop'
  ) %>%
  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
 print(observed_temporal_ratios)
 # More efficient bootstrap function for Hartley's F-max test
 bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
  # Get unique groups and their sample sizes
  groups <- unique(data[[group_var]])
  # Calculate observed variances for each group
  observed_vars <- data %>%
    dplyr::group_by(!!sym(group_var)) %>%
    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
    dplyr::pull(var)
  # Handle invalid variances
  if(any(observed_vars <= 0 | is.na(observed_vars))) {
    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
  }
  # Calculate observed F-max ratio
  observed_ratio <- max(observed_vars) / min(observed_vars)
  # Pre-allocate storage for bootstrap ratios
  bootstrap_ratios <- numeric(n_iter)
  # Get group data once
  group_data_list <- map(groups, ~ {
    group_data <- data[data[[group_var]] == .x, response_var]
    group_data[!is.na(group_data)]
  })
  # Bootstrap with pre-allocated storage
  for(i in 1:n_iter) {
    # Bootstrap sample from each group independently
    sample_vars <- map_dbl(group_data_list, ~ {
      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
      var(bootstrap_sample, na.rm = TRUE)
    })
    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
  }
  # Remove invalid ratios
  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
  if(length(valid_ratios) == 0) {
    stop("No valid bootstrap ratios generated")
  }
  # Calculate critical value (95th percentile)
  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
  # Return only essential information
  return(list(
    observed_ratio = observed_ratio,
    critical_95 = critical_95,
    n_valid_iterations = length(valid_ratios)
  ))
 }
 # Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
 print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
 set.seed(123)  # For reproducibility
 hartley_temporal_results <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
    .groups = 'drop'
  ) %>%
  mutate(
    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
    significant = observed_ratio > critical_95
  ) %>%
  select(TIME, ITEM, observed_ratio, critical_95, significant)
 print(hartley_temporal_results)
 # =============================================================================
 # MIXED ANOVA ANALYSIS
 # =============================================================================
 # Check for missing data patterns
 table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
 # Check data balance
 xtabs(~ pID + TIME + ITEM, data = long_data_clean)
 # Check data dimensions and structure
 print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
 print(paste("Number of participants:", length(unique(long_data_clean$pID))))
 print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
 print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
 print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
 # Check for complete cases
 complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
 print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
 # Check if design is balanced
 design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
 print(summary(as.vector(design_balance)))
 # Check for any participants with missing combinations
 missing_combos <- long_data_clean %>%
  group_by(pID) %>%
  summarise(
    n_combinations = n(),
    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
    missing_combinations = 10 - n_combinations,
    .groups = 'drop'
  )
 print("Missing combinations per participant:")
 print(missing_combos[missing_combos$missing_combinations > 0, ])
 # Mixed ANOVA using aov() - Traditional approach
 # Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
 # Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
 mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
                        data = long_data_clean)
 print("Mixed ANOVA Results (aov):")
 print(summary(mixed_anova_model))
 # Alternative: Using afex::aov_ez for cleaner output (optional)
 print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
 mixed_anova_afex <- aov_ez(id = "pID",
                          dv = "MEAN_DIFFERENCE",
                          data = long_data_clean,
                          between = "TEMPORAL_DO",
                          within = c("TIME", "ITEM"))
 print("Mixed ANOVA Results (afex):")
 print(mixed_anova_afex)
 # =============================================================================
 # SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
 # =============================================================================
 # Sphericity tests using ezANOVA (library already loaded)
 print("\n=== SPHERICITY TESTS ===")
 # Test sphericity for ITEM (5 levels - within-subjects)
 print("Mauchly's Test of Sphericity for ITEM:")
 tryCatch({
  # Create a temporary data frame for ezANOVA
  temp_data <- long_data_clean
  temp_data$id <- as.numeric(as.factor(temp_data$pID))
  # Run ezANOVA to get sphericity tests
  ez_item <- ezANOVA(data = temp_data, 
                       dv = MEAN_DIFFERENCE, 
                       wid = id, 
                       within = ITEM,
                       type = 3,
                       detailed = TRUE)
  print("ITEM Sphericity Test:")
  print(ez_item$Mauchly)
 }, error = function(e) {
  print(paste("Error in ITEM sphericity test:", e$message))
 })
 # Test sphericity for TIME (2 levels - within-subjects)
 print("\nMauchly's Test of Sphericity for TIME:")
 tryCatch({
  ez_time <- ezANOVA(data = temp_data, 
                       dv = MEAN_DIFFERENCE, 
                       wid = id, 
                       within = TIME,
                     type = 3,
                     detailed = TRUE)
  print("TIME Sphericity Test:")
  print(ez_time$Mauchly)
 }, error = function(e) {
  print(paste("Error in TIME sphericity test:", e$message))
 })
 # Test sphericity for TIME × ITEM interaction
 print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
 tryCatch({
  ez_interaction <- ezANOVA(data = temp_data, 
                           dv = MEAN_DIFFERENCE, 
                           wid = id, 
                           within = .(TIME, ITEM),
                           type = 3,
                           detailed = TRUE)
  print("TIME × ITEM Sphericity Test:")
  print(ez_interaction$Mauchly)
 }, error = function(e) {
  print(paste("Error in TIME × ITEM sphericity test:", e$message))
 })
 # =============================================================================
 # CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
 # =============================================================================
 print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
 # Get corrected results from ezANOVA
 ez_corrected <- ezANOVA(data = long_data_clean, 
                       dv = MEAN_DIFFERENCE, 
                       wid = pID, 
                       within = .(TIME, ITEM),
                       type = 3,
                       detailed = TRUE)
 print("Corrected ANOVA Results with Sphericity Corrections:")
 print(ez_corrected$ANOVA)
 # Show epsilon values for sphericity corrections
 print("\nEpsilon Values for Sphericity Corrections:")
 print(ez_corrected$Mauchly)
 # Show sphericity-corrected results
 print("\nSphericity-Corrected Results:")
 print("Available elements in ez_corrected object:")
 print(names(ez_corrected))
 # Check if sphericity corrections are available
 if(!is.null(ez_corrected$`Sphericity Corrections`)) {
  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
  print(ez_corrected$`Sphericity Corrections`)
  # Extract and display corrected degrees of freedom
  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
  # Get the sphericity corrections
  sphericity_corr <- ez_corrected$`Sphericity Corrections`
  # Extract original degrees of freedom from ANOVA table
  anova_table <- ez_corrected$ANOVA
  # Calculate corrected degrees of freedom
  corrected_df <- data.frame(
    Effect = sphericity_corr$Effect,
    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    GG_epsilon = sphericity_corr$GGe,
    HF_epsilon = sphericity_corr$HFe
  )
  print(corrected_df)
  # Also show the corrected F-values and p-values with degrees of freedom
  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
  for(i in 1:nrow(corrected_df)) {
    effect <- corrected_df$Effect[i]
    f_value <- anova_table$F[match(effect, anova_table$Effect)]
    cat(sprintf("\n%s:\n", effect))
    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
  }
 } else {
  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
  print("Check the Mauchly's test p-values above to determine if corrections are needed")
 }
 # =============================================================================
 # ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
 # =============================================================================
 print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
 # Create a wide-format data for car package (library already loaded)
 tryCatch({
  # Convert to wide format for car package
  wide_data <- long_data_clean %>%
    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
    pivot_wider(names_from = c(TIME, ITEM), 
                values_from = MEAN_DIFFERENCE,
                names_sep = "_")
  # Create the repeated measures design
  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
  # Check if all columns exist
  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
  if(length(missing_cols) > 0) {
    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
  } else {
    # Create the repeated measures design
    rm_design <- as.matrix(wide_data[, within_vars])
    # Calculate epsilon values
    print("Epsilon Values from car package:")
    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
    # Interpretation
    if(epsilon_gg < 0.75) {
      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
    } else if(epsilon_hf > 0.75) {
      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
    } else {
      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
    }
    # =============================================================================
    # MANUAL SPHERICITY CORRECTIONS
    # =============================================================================
    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
    # Apply corrections to the original ANOVA results
    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
    # ITEM main effect (DFn = 4, DFd = 4244)
    item_df_corrected_gg <- 4 * epsilon_gg
    item_df_corrected_hf <- 4 * epsilon_hf
    print(paste("ITEM: Original df = 4, 4244"))
    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
    interaction_df_corrected_gg <- 4 * epsilon_gg
    interaction_df_corrected_hf <- 4 * epsilon_hf
    print(paste("TIME × ITEM: Original df = 4, 4244"))
    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
    # Note: You would need to recalculate p-values with these corrected dfs
    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
    print("The ezANOVA function should handle this automatically, but may not display the corrections")
  }
 }, error = function(e) {
  print(paste("Error in manual epsilon calculation:", e$message))
 })
 # =============================================================================
 # EFFECT SIZES (GENERALIZED ETA SQUARED)
 # =============================================================================
 # Effect size calculations (library already loaded)
 print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
 # Calculate generalized eta squared for the aov model
 print("Effect Sizes from aov() model:")
 tryCatch({
  # Extract effect sizes from aov model
  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
  print(round(aov_effects, 5))
 }, error = function(e) {
  print(paste("Error calculating effect sizes from aov:", e$message))
 })
 # Calculate effect sizes for ezANOVA model
 print("\nEffect Sizes from ezANOVA model:")
 tryCatch({
  # ezANOVA provides partial eta squared, convert to generalized
  ez_effects <- ez_corrected$ANOVA
  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
  print("Generalized Eta Squared from ezANOVA:")
  print(round(ez_effects[, c("Effect", "ges")], 5))
 }, error = function(e) {
  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
 })
 # Extract effect sizes (generalized eta squared)
 # For aov() objects, we need to extract from the summary
 anova_summary <- summary(mixed_anova_model)
 # =============================================================================
 # NOTE: MIXED MODELS (LMER) NOT NEEDED
 # =============================================================================
 # For this balanced repeated measures design, Type III ANOVA with proper 
 # sphericity corrections (implemented above) is the most appropriate approach.
 # Mixed models (lmer) are typically used for:
 # - Unbalanced designs
 # - Missing data patterns
 # - Nested random effects
 # - Large, complex datasets
 #
 # Your design is balanced and complete, making Type III ANOVA optimal.
 # =============================================================================
 # POST-HOC COMPARISONS
 # =============================================================================
 # Post-hoc comparisons using emmeans
 print("\n=== POST-HOC COMPARISONS ===")
 # Main effect of TIME
 print("Main Effect of TIME:")
 time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
 print("Estimated Marginal Means:")
 print(time_emmeans)
 print("\nPairwise Contrasts:")
 time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
 print(time_contrasts)
 # Main effect of ITEM
 print("\nMain Effect of ITEM:")
 item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
 print("Estimated Marginal Means:")
 print(item_emmeans)
 print("\nPairwise Contrasts:")
 item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
 print(item_contrasts)
 # =============================================================================
 # INTERACTION EXPLORATIONS
 # =============================================================================
 # TIME × ITEM Interaction
 print("\n=== TIME × ITEM INTERACTION ===")
 time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
 print("Estimated Marginal Means:")
 print(time_item_emmeans)
 print("\nSimple Effects of ITEM within each TIME:")
 time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
 print(time_item_simple)
 print("\nSimple Effects of TIME within each ITEM:")
 time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
 print(time_item_simple2)
 # =============================================================================
 # COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
 # =============================================================================
 # =============================================================================
 # COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
 # =============================================================================
 # Cohen's d calculations (library already loaded)
 print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
 # Function to calculate Cohen's d for pairwise comparisons
 calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
  if(nrow(significant_pairs) > 0) {
    cat("Significant pairwise comparisons (p < 0.05):\n")
    print(significant_pairs)
    cat("\nCohen's d calculated from raw data:\n")
    for(i in seq_len(nrow(significant_pairs))) {
      comparison <- significant_pairs[i, ]
      contrast_name <- as.character(comparison$contrast)
      # Parse the contrast
      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
      if(length(contrast_parts) == 2) {
        level1 <- trimws(contrast_parts[1])
        level2 <- trimws(contrast_parts[2])
        # Get raw data for both conditions
        if(group2_var %in% colnames(comparison)) {
          group2_level <- as.character(comparison[[group2_var]])
          data1 <- data[[response_var]][
            data[[group1_var]] == level1 & 
            data[[group2_var]] == group2_level]
          data2 <- data[[response_var]][
            data[[group1_var]] == level2 & 
            data[[group2_var]] == group2_level]
        } else {
          data1 <- data[[response_var]][data[[group1_var]] == level1]
          data2 <- data[[response_var]][data[[group1_var]] == level2]
        }
        if(length(data1) > 0 && length(data2) > 0) {
          # Calculate Cohen's d using effsize package
          cohens_d_result <- cohen.d(data1, data2)
          cat(sprintf("Comparison: %s", contrast_name))
          if(group2_var %in% colnames(comparison)) {
            cat(sprintf(" | %s", group2_level))
          }
          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
          cat("\n")
        }
      }
    }
  } else {
    cat("No significant pairwise comparisons found.\n")
  }
 }
 # =============================================================================
 # 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
 # =============================================================================
 print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
 # Get simple effects of TIME within each ITEM
 time_item_simple2_df <- as.data.frame(time_item_simple2)
 calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
 # Get simple effects of ITEM within each TIME
 time_item_simple_df <- as.data.frame(time_item_simple)
 calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
--- a/preferences.r
+++ b/preferences.r
@ -0,0 +1,765 @@
 # Mixed ANOVA Analysis for Preference Items
 # EOHI Experiment Data Analysis - Item Level Analysis
 # Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
 #           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
 # Load required libraries
 library(tidyverse)
 library(ez)
 library(car)
 library(afex)         # For aov_ez (cleaner ANOVA output)
 library(nortest)      # For normality tests
 library(emmeans)      # For post-hoc comparisons
 library(purrr)        # For map functions
 library(effsize)      # For Cohen's d calculations
 library(effectsize)   # For effect size calculations
 # Global options to remove scientific notation
 options(scipen = 999)
 # Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
 options(contrasts = c("contr.sum", "contr.poly"))
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Read the data
 data <- read.csv("exp1.csv")
 # Display basic information about the dataset
 print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
 print(paste("Number of participants:", length(unique(data$pID))))
 # Verify the specific variables we need
 required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
 missing_vars <- required_vars[!required_vars %in% colnames(data)]
 if (length(missing_vars) > 0) {
  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
 } else {
  print("All required preference item variables found!")
 }
 # Define item mapping
 item_mapping <- data.frame(
  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
  time = c(rep("Past", 5), rep("Future", 5)),
  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
  stringsAsFactors = FALSE
 )
 # Item mapping created
 # Efficient data pivoting using pivot_longer
 long_data <- data %>%
  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
  pivot_longer(
    cols = all_of(required_vars),
    names_to = "variable",
    values_to = "MEAN_DIFFERENCE"
  ) %>%
  left_join(item_mapping, by = "variable") %>%
  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
  mutate(
    TIME = factor(time, levels = c("Past", "Future")),
    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
    pID = as.factor(pID),
    TEMPORAL_DO = as.factor(TEMPORAL_DO)
  ) %>%
  # Select final columns and remove any rows with missing values
  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
  filter(!is.na(MEAN_DIFFERENCE))
 print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
 print(paste("Number of participants:", length(unique(long_data$pID))))
 # =============================================================================
 # DESCRIPTIVE STATISTICS
 # =============================================================================
 # Overall descriptive statistics by TIME and ITEM
 desc_stats <- long_data %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TIME and ITEM:")
 print(desc_stats)
 # Descriptive statistics by between-subjects factors
 desc_stats_by_temporal <- long_data %>%
  group_by(TEMPORAL_DO, TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
 print(desc_stats_by_temporal)
 # =============================================================================
 # ASSUMPTION TESTING
 # =============================================================================
 # Remove missing values for assumption testing
 long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
 print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
 # 1. Missing values check
 missing_summary <- long_data %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n_total = n(),
    n_missing = sum(is.na(MEAN_DIFFERENCE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups = 'drop'
  )
 print("Missing values by TIME and ITEM:")
 print(missing_summary)
 # 2. Outlier detection
 outlier_summary <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = mean(MEAN_DIFFERENCE),
    sd = sd(MEAN_DIFFERENCE),
    q1 = quantile(MEAN_DIFFERENCE, 0.25),
    q3 = quantile(MEAN_DIFFERENCE, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 1.5 * iqr,
    upper_bound = q3 + 1.5 * iqr,
    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
    .groups = 'drop'
  )
 print("Outlier summary (IQR method):")
 print(outlier_summary)
 # 3. Anderson-Darling normality test (streamlined)
 normality_results <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
    .groups = 'drop'
  )
 print("Anderson-Darling normality test results:")
 # Round only the numeric columns
 normality_results_rounded <- normality_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 5)))
 print(normality_results_rounded)
 # 4. Homogeneity of variance (Levene's test)
 # Test homogeneity across TIME within each ITEM
 homogeneity_time <- long_data_clean %>%
  group_by(ITEM) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across TIME within each ITEM:")
 print(homogeneity_time)
 # Test homogeneity across ITEM within each TIME
 homogeneity_item <- long_data_clean %>%
  group_by(TIME) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across ITEM within each TIME:")
 print(homogeneity_item)
 # =============================================================================
 # HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
 # =============================================================================
 # Function to calculate Hartley's F-max ratio
 calculate_hartley_ratio <- function(variances) {
  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
 }
 # =============================================================================
 # CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
 # =============================================================================
 # For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
 # within each combination of within-subjects factors (TIME × ITEM)
 # First, let's check what values TEMPORAL_DO actually has
 print("=== CHECKING TEMPORAL_DO VALUES ===")
 print("Unique TEMPORAL_DO values:")
 print(unique(long_data_clean$TEMPORAL_DO))
 print("TEMPORAL_DO value counts:")
 print(table(long_data_clean$TEMPORAL_DO))
 print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
 observed_temporal_ratios <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
    # Calculate F-max ratio
    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
    .groups = 'drop'
  ) %>%
  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
 print(observed_temporal_ratios)
 # More efficient bootstrap function for Hartley's F-max test
 bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
  # Get unique groups and their sample sizes
  groups <- unique(data[[group_var]])
  # Calculate observed variances for each group
  observed_vars <- data %>%
    dplyr::group_by(!!sym(group_var)) %>%
    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
    dplyr::pull(var)
  # Handle invalid variances
  if(any(observed_vars <= 0 | is.na(observed_vars))) {
    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
  }
  # Calculate observed F-max ratio
  observed_ratio <- max(observed_vars) / min(observed_vars)
  # Pre-allocate storage for bootstrap ratios
  bootstrap_ratios <- numeric(n_iter)
  # Get group data once
  group_data_list <- map(groups, ~ {
    group_data <- data[data[[group_var]] == .x, response_var]
    group_data[!is.na(group_data)]
  })
  # Bootstrap with pre-allocated storage
  for(i in 1:n_iter) {
    # Bootstrap sample from each group independently
    sample_vars <- map_dbl(group_data_list, ~ {
      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
      var(bootstrap_sample, na.rm = TRUE)
    })
    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
  }
  # Remove invalid ratios
  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
  if(length(valid_ratios) == 0) {
    stop("No valid bootstrap ratios generated")
  }
  # Calculate critical value (95th percentile)
  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
  # Return only essential information
  return(list(
    observed_ratio = observed_ratio,
    critical_95 = critical_95,
    n_valid_iterations = length(valid_ratios)
  ))
 }
 # Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
 print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
 set.seed(123)  # For reproducibility
 hartley_temporal_results <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
    .groups = 'drop'
  ) %>%
  mutate(
    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
    significant = observed_ratio > critical_95
  ) %>%
  select(TIME, ITEM, observed_ratio, critical_95, significant)
 print(hartley_temporal_results)
 # =============================================================================
 # MIXED ANOVA ANALYSIS
 # =============================================================================
 # Check for missing data patterns
 table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
 # Check data balance
 xtabs(~ pID + TIME + ITEM, data = long_data_clean)
 # Check data dimensions and structure
 print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
 print(paste("Number of participants:", length(unique(long_data_clean$pID))))
 print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
 print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
 print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
 # Check for complete cases
 complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
 print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
 # Check if design is balanced
 design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
 print(summary(as.vector(design_balance)))
 # Check for any participants with missing combinations
 missing_combos <- long_data_clean %>%
  group_by(pID) %>%
  summarise(
    n_combinations = n(),
    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
    missing_combinations = 10 - n_combinations,
    .groups = 'drop'
  )
 print("Missing combinations per participant:")
 print(missing_combos[missing_combos$missing_combinations > 0, ])
 # Mixed ANOVA using aov() - Traditional approach
 # Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
 # Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
 mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
                        data = long_data_clean)
 print("Mixed ANOVA Results (aov):")
 print(summary(mixed_anova_model))
 # Alternative: Using afex::aov_ez for cleaner output (optional)
 print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
 mixed_anova_afex <- aov_ez(id = "pID",
                          dv = "MEAN_DIFFERENCE",
                          data = long_data_clean,
                          between = "TEMPORAL_DO",
                          within = c("TIME", "ITEM"))
 print("Mixed ANOVA Results (afex):")
 print(mixed_anova_afex)
 # =============================================================================
 # SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
 # =============================================================================
 # Sphericity tests using ezANOVA (library already loaded)
 print("\n=== SPHERICITY TESTS ===")
 # Test sphericity for ITEM (5 levels - within-subjects)
 print("Mauchly's Test of Sphericity for ITEM:")
 tryCatch({
  # Create a temporary data frame for ezANOVA
  temp_data <- long_data_clean
  temp_data$id <- as.numeric(as.factor(temp_data$pID))
  # Run ezANOVA to get sphericity tests
  ez_item <- ezANOVA(data = temp_data, 
                       dv = MEAN_DIFFERENCE, 
                       wid = id, 
                       within = ITEM,
                       type = 3,
                       detailed = TRUE)
  print("ITEM Sphericity Test:")
  print(ez_item$Mauchly)
 }, error = function(e) {
  print(paste("Error in ITEM sphericity test:", e$message))
 })
 # Test sphericity for TIME (2 levels - within-subjects)
 print("\nMauchly's Test of Sphericity for TIME:")
 tryCatch({
  ez_time <- ezANOVA(data = temp_data, 
                       dv = MEAN_DIFFERENCE, 
                       wid = id, 
                       within = TIME,
                     type = 3,
                     detailed = TRUE)
  print("TIME Sphericity Test:")
  print(ez_time$Mauchly)
 }, error = function(e) {
  print(paste("Error in TIME sphericity test:", e$message))
 })
 # Test sphericity for TIME × ITEM interaction
 print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
 tryCatch({
  ez_interaction <- ezANOVA(data = temp_data, 
                           dv = MEAN_DIFFERENCE, 
                           wid = id, 
                           within = .(TIME, ITEM),
                           type = 3,
                           detailed = TRUE)
  print("TIME × ITEM Sphericity Test:")
  print(ez_interaction$Mauchly)
 }, error = function(e) {
  print(paste("Error in TIME × ITEM sphericity test:", e$message))
 })
 # =============================================================================
 # CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
 # =============================================================================
 print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
 # Get corrected results from ezANOVA
 ez_corrected <- ezANOVA(data = long_data_clean, 
                       dv = MEAN_DIFFERENCE, 
                       wid = pID, 
                       within = .(TIME, ITEM),
                       type = 3,
                       detailed = TRUE)
 print("Corrected ANOVA Results with Sphericity Corrections:")
 print(ez_corrected$ANOVA)
 # Show epsilon values for sphericity corrections
 print("\nEpsilon Values for Sphericity Corrections:")
 print(ez_corrected$Mauchly)
 # Show sphericity-corrected results
 print("\nSphericity-Corrected Results:")
 print("Available elements in ez_corrected object:")
 print(names(ez_corrected))
 # Check if sphericity corrections are available
 if(!is.null(ez_corrected$`Sphericity Corrections`)) {
  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
  print(ez_corrected$`Sphericity Corrections`)
  # Extract and display corrected degrees of freedom
  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
  # Get the sphericity corrections
  sphericity_corr <- ez_corrected$`Sphericity Corrections`
  # Extract original degrees of freedom from ANOVA table
  anova_table <- ez_corrected$ANOVA
  # Calculate corrected degrees of freedom
  corrected_df <- data.frame(
    Effect = sphericity_corr$Effect,
    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    GG_epsilon = sphericity_corr$GGe,
    HF_epsilon = sphericity_corr$HFe
  )
  print(corrected_df)
  # Also show the corrected F-values and p-values with degrees of freedom
  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
  for(i in 1:nrow(corrected_df)) {
    effect <- corrected_df$Effect[i]
    f_value <- anova_table$F[match(effect, anova_table$Effect)]
    cat(sprintf("\n%s:\n", effect))
    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
  }
 } else {
  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
  print("Check the Mauchly's test p-values above to determine if corrections are needed")
 }
 # =============================================================================
 # ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
 # =============================================================================
 print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
 # Create a wide-format data for car package (library already loaded)
 tryCatch({
  # Convert to wide format for car package
  wide_data <- long_data_clean %>%
    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
    pivot_wider(names_from = c(TIME, ITEM), 
                values_from = MEAN_DIFFERENCE,
                names_sep = "_")
  # Create the repeated measures design
  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
  # Check if all columns exist
  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
  if(length(missing_cols) > 0) {
    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
  } else {
    # Create the repeated measures design
    rm_design <- as.matrix(wide_data[, within_vars])
    # Calculate epsilon values
    print("Epsilon Values from car package:")
    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
    # Interpretation
    if(epsilon_gg < 0.75) {
      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
    } else if(epsilon_hf > 0.75) {
      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
    } else {
      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
    }
    # =============================================================================
    # MANUAL SPHERICITY CORRECTIONS
    # =============================================================================
    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
    # Apply corrections to the original ANOVA results
    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
    # ITEM main effect (DFn = 4, DFd = 4244)
    item_df_corrected_gg <- 4 * epsilon_gg
    item_df_corrected_hf <- 4 * epsilon_hf
    print(paste("ITEM: Original df = 4, 4244"))
    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
    interaction_df_corrected_gg <- 4 * epsilon_gg
    interaction_df_corrected_hf <- 4 * epsilon_hf
    print(paste("TIME × ITEM: Original df = 4, 4244"))
    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
    # Note: You would need to recalculate p-values with these corrected dfs
    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
    print("The ezANOVA function should handle this automatically, but may not display the corrections")
  }
 }, error = function(e) {
  print(paste("Error in manual epsilon calculation:", e$message))
 })
 # =============================================================================
 # EFFECT SIZES (GENERALIZED ETA SQUARED)
 # =============================================================================
 # Effect size calculations (library already loaded)
 print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
 # Calculate generalized eta squared for the aov model
 print("Effect Sizes from aov() model:")
 tryCatch({
  # Extract effect sizes from aov model
  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
  print(round(aov_effects, 5))
 }, error = function(e) {
  print(paste("Error calculating effect sizes from aov:", e$message))
 })
 # Calculate effect sizes for ezANOVA model
 print("\nEffect Sizes from ezANOVA model:")
 tryCatch({
  # ezANOVA provides partial eta squared, convert to generalized
  ez_effects <- ez_corrected$ANOVA
  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
  print("Generalized Eta Squared from ezANOVA:")
  print(round(ez_effects[, c("Effect", "ges")], 5))
 }, error = function(e) {
  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
 })
 # Extract effect sizes (generalized eta squared)
 # For aov() objects, we need to extract from the summary
 anova_summary <- summary(mixed_anova_model)
 # =============================================================================
 # NOTE: MIXED MODELS (LMER) NOT NEEDED
 # =============================================================================
 # For this balanced repeated measures design, Type III ANOVA with proper 
 # sphericity corrections (implemented above) is the most appropriate approach.
 # Mixed models (lmer) are typically used for:
 # - Unbalanced designs
 # - Missing data patterns
 # - Nested random effects
 # - Large, complex datasets
 #
 # Your design is balanced and complete, making Type III ANOVA optimal.
 # =============================================================================
 # POST-HOC COMPARISONS
 # =============================================================================
 # Post-hoc comparisons using emmeans
 print("\n=== POST-HOC COMPARISONS ===")
 # Main effect of TIME
 print("Main Effect of TIME:")
 time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
 print("Estimated Marginal Means:")
 print(time_emmeans)
 print("\nPairwise Contrasts:")
 time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
 print(time_contrasts)
 # Main effect of ITEM
 print("\nMain Effect of ITEM:")
 item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
 print("Estimated Marginal Means:")
 print(item_emmeans)
 print("\nPairwise Contrasts:")
 item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
 print(item_contrasts)
 # =============================================================================
 # INTERACTION EXPLORATIONS
 # =============================================================================
 # TIME × ITEM Interaction
 print("\n=== TIME × ITEM INTERACTION ===")
 time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
 print("Estimated Marginal Means:")
 print(time_item_emmeans)
 print("\nSimple Effects of ITEM within each TIME:")
 time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
 print(time_item_simple)
 print("\nSimple Effects of TIME within each ITEM:")
 time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
 print(time_item_simple2)
 # =============================================================================
 # COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
 # =============================================================================
 # =============================================================================
 # COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
 # =============================================================================
 # Cohen's d calculations (library already loaded)
 print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
 # Function to calculate Cohen's d for pairwise comparisons
 calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
  if(nrow(significant_pairs) > 0) {
    cat("Significant pairwise comparisons (p < 0.05):\n")
    print(significant_pairs)
    cat("\nCohen's d calculated from raw data:\n")
    for(i in seq_len(nrow(significant_pairs))) {
      comparison <- significant_pairs[i, ]
      contrast_name <- as.character(comparison$contrast)
      # Parse the contrast
      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
      if(length(contrast_parts) == 2) {
        level1 <- trimws(contrast_parts[1])
        level2 <- trimws(contrast_parts[2])
        # Get raw data for both conditions
        if(group2_var %in% colnames(comparison)) {
          group2_level <- as.character(comparison[[group2_var]])
          data1 <- data[[response_var]][
            data[[group1_var]] == level1 & 
            data[[group2_var]] == group2_level]
          data2 <- data[[response_var]][
            data[[group1_var]] == level2 & 
            data[[group2_var]] == group2_level]
        } else {
          data1 <- data[[response_var]][data[[group1_var]] == level1]
          data2 <- data[[response_var]][data[[group1_var]] == level2]
        }
        if(length(data1) > 0 && length(data2) > 0) {
          # Calculate Cohen's d using effsize package
          cohens_d_result <- cohen.d(data1, data2)
          cat(sprintf("Comparison: %s", contrast_name))
          if(group2_var %in% colnames(comparison)) {
            cat(sprintf(" | %s", group2_level))
          }
          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
          cat("\n")
        }
      }
    }
  } else {
    cat("No significant pairwise comparisons found.\n")
  }
 }
 # =============================================================================
 # 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
 # =============================================================================
 print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
 # Get simple effects of TIME within each ITEM
 time_item_simple2_df <- as.data.frame(time_item_simple2)
 calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
 # Get simple effects of ITEM within each TIME
 time_item_simple_df <- as.data.frame(time_item_simple)
 calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
--- a/eohi1/mixed
+++ b/eohi1/mixed
@ -0,0 +1,765 @@
 # Mixed ANOVA Analysis for Values Items
 # EOHI Experiment Data Analysis - Item Level Analysis
 # Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
 #           NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
 # Load required libraries
 library(tidyverse)
 library(ez)
 library(car)
 library(afex)         # For aov_ez (cleaner ANOVA output)
 library(nortest)      # For normality tests
 library(emmeans)      # For post-hoc comparisons
 library(purrr)        # For map functions
 library(effsize)      # For Cohen's d calculations
 library(effectsize)   # For effect size calculations
 # Global options to remove scientific notation
 options(scipen = 999)
 # Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
 options(contrasts = c("contr.sum", "contr.poly"))
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 # Read the data
 data <- read.csv("exp1.csv")
 # Display basic information about the dataset
 print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
 print(paste("Number of participants:", length(unique(data$pID))))
 # Verify the specific variables we need
 required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
                   "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
 missing_vars <- required_vars[!required_vars %in% colnames(data)]
 if (length(missing_vars) > 0) {
  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
 } else {
  print("All required values item variables found!")
 }
 # Define item mapping
 item_mapping <- data.frame(
  variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
               "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
  time = c(rep("Past", 5), rep("Future", 5)),
  item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
  stringsAsFactors = FALSE
 )
 # Item mapping created
 # Efficient data pivoting using pivot_longer
 long_data <- data %>%
  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
  pivot_longer(
    cols = all_of(required_vars),
    names_to = "variable",
    values_to = "MEAN_DIFFERENCE"
  ) %>%
  left_join(item_mapping, by = "variable") %>%
  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
  mutate(
    TIME = factor(time, levels = c("Past", "Future")),
    ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
    pID = as.factor(pID),
    TEMPORAL_DO = as.factor(TEMPORAL_DO)
  ) %>%
  # Select final columns and remove any rows with missing values
  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
  filter(!is.na(MEAN_DIFFERENCE))
 print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
 print(paste("Number of participants:", length(unique(long_data$pID))))
 # =============================================================================
 # DESCRIPTIVE STATISTICS
 # =============================================================================
 # Overall descriptive statistics by TIME and ITEM
 desc_stats <- long_data %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TIME and ITEM:")
 print(desc_stats)
 # Descriptive statistics by between-subjects factors
 desc_stats_by_temporal <- long_data %>%
  group_by(TEMPORAL_DO, TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
    .groups = 'drop'
  )
 print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
 print(desc_stats_by_temporal)
 # =============================================================================
 # ASSUMPTION TESTING
 # =============================================================================
 # Remove missing values for assumption testing
 long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
 print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
 # 1. Missing values check
 missing_summary <- long_data %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n_total = n(),
    n_missing = sum(is.na(MEAN_DIFFERENCE)),
    pct_missing = round(100 * n_missing / n_total, 2),
    .groups = 'drop'
  )
 print("Missing values by TIME and ITEM:")
 print(missing_summary)
 # 2. Outlier detection
 outlier_summary <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    mean = mean(MEAN_DIFFERENCE),
    sd = sd(MEAN_DIFFERENCE),
    q1 = quantile(MEAN_DIFFERENCE, 0.25),
    q3 = quantile(MEAN_DIFFERENCE, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 1.5 * iqr,
    upper_bound = q3 + 1.5 * iqr,
    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
    .groups = 'drop'
  )
 print("Outlier summary (IQR method):")
 print(outlier_summary)
 # 3. Anderson-Darling normality test (streamlined)
 normality_results <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    n = n(),
    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
    .groups = 'drop'
  )
 print("Anderson-Darling normality test results:")
 # Round only the numeric columns
 normality_results_rounded <- normality_results %>%
  mutate(across(where(is.numeric), ~ round(.x, 5)))
 print(normality_results_rounded)
 # 4. Homogeneity of variance (Levene's test)
 # Test homogeneity across TIME within each ITEM
 homogeneity_time <- long_data_clean %>%
  group_by(ITEM) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across TIME within each ITEM:")
 print(homogeneity_time)
 # Test homogeneity across ITEM within each TIME
 homogeneity_item <- long_data_clean %>%
  group_by(TIME) %>%
  summarise(
    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
    .groups = 'drop'
  )
 print("Homogeneity of variance across ITEM within each TIME:")
 print(homogeneity_item)
 # =============================================================================
 # HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
 # =============================================================================
 # Function to calculate Hartley's F-max ratio
 calculate_hartley_ratio <- function(variances) {
  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
 }
 # =============================================================================
 # CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
 # =============================================================================
 # For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
 # within each combination of within-subjects factors (TIME × ITEM)
 # First, let's check what values TEMPORAL_DO actually has
 print("=== CHECKING TEMPORAL_DO VALUES ===")
 print("Unique TEMPORAL_DO values:")
 print(unique(long_data_clean$TEMPORAL_DO))
 print("TEMPORAL_DO value counts:")
 print(table(long_data_clean$TEMPORAL_DO))
 print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
 observed_temporal_ratios <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
    # Calculate F-max ratio
    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
    .groups = 'drop'
  ) %>%
  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
 print(observed_temporal_ratios)
 # More efficient bootstrap function for Hartley's F-max test
 bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
  # Get unique groups and their sample sizes
  groups <- unique(data[[group_var]])
  # Calculate observed variances for each group
  observed_vars <- data %>%
    dplyr::group_by(!!sym(group_var)) %>%
    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
    dplyr::pull(var)
  # Handle invalid variances
  if(any(observed_vars <= 0 | is.na(observed_vars))) {
    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
  }
  # Calculate observed F-max ratio
  observed_ratio <- max(observed_vars) / min(observed_vars)
  # Pre-allocate storage for bootstrap ratios
  bootstrap_ratios <- numeric(n_iter)
  # Get group data once
  group_data_list <- map(groups, ~ {
    group_data <- data[data[[group_var]] == .x, response_var]
    group_data[!is.na(group_data)]
  })
  # Bootstrap with pre-allocated storage
  for(i in 1:n_iter) {
    # Bootstrap sample from each group independently
    sample_vars <- map_dbl(group_data_list, ~ {
      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
      var(bootstrap_sample, na.rm = TRUE)
    })
    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
  }
  # Remove invalid ratios
  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
  if(length(valid_ratios) == 0) {
    stop("No valid bootstrap ratios generated")
  }
  # Calculate critical value (95th percentile)
  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
  # Return only essential information
  return(list(
    observed_ratio = observed_ratio,
    critical_95 = critical_95,
    n_valid_iterations = length(valid_ratios)
  ))
 }
 # Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
 print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
 set.seed(123)  # For reproducibility
 hartley_temporal_results <- long_data_clean %>%
  group_by(TIME, ITEM) %>%
  summarise(
    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
    .groups = 'drop'
  ) %>%
  mutate(
    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
    significant = observed_ratio > critical_95
  ) %>%
  select(TIME, ITEM, observed_ratio, critical_95, significant)
 print(hartley_temporal_results)
 # =============================================================================
 # MIXED ANOVA ANALYSIS
 # =============================================================================
 # Check for missing data patterns
 table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
 # Check data balance
 xtabs(~ pID + TIME + ITEM, data = long_data_clean)
 # Check data dimensions and structure
 print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
 print(paste("Number of participants:", length(unique(long_data_clean$pID))))
 print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
 print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
 print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
 # Check for complete cases
 complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
 print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
 # Check if design is balanced
 design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
 print(summary(as.vector(design_balance)))
 # Check for any participants with missing combinations
 missing_combos <- long_data_clean %>%
  group_by(pID) %>%
  summarise(
    n_combinations = n(),
    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
    missing_combinations = 10 - n_combinations,
    .groups = 'drop'
  )
 print("Missing combinations per participant:")
 print(missing_combos[missing_combos$missing_combinations > 0, ])
 # Mixed ANOVA using aov() - Traditional approach
 # Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
 # Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
 mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
                        data = long_data_clean)
 print("Mixed ANOVA Results (aov):")
 print(summary(mixed_anova_model))
 # Alternative: Using afex::aov_ez for cleaner output (optional)
 print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
 mixed_anova_afex <- aov_ez(id = "pID",
                          dv = "MEAN_DIFFERENCE",
                          data = long_data_clean,
                          between = "TEMPORAL_DO",
                          within = c("TIME", "ITEM"))
 print("Mixed ANOVA Results (afex):")
 print(mixed_anova_afex)
 # =============================================================================
 # SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
 # =============================================================================
 # Sphericity tests using ezANOVA (library already loaded)
 print("\n=== SPHERICITY TESTS ===")
 # Test sphericity for ITEM (5 levels - within-subjects)
 print("Mauchly's Test of Sphericity for ITEM:")
 tryCatch({
  # Create a temporary data frame for ezANOVA
  temp_data <- long_data_clean
  temp_data$id <- as.numeric(as.factor(temp_data$pID))
  # Run ezANOVA to get sphericity tests
  ez_item <- ezANOVA(data = temp_data, 
                       dv = MEAN_DIFFERENCE, 
                       wid = id, 
                       within = ITEM,
                       type = 3,
                       detailed = TRUE)
  print("ITEM Sphericity Test:")
  print(ez_item$Mauchly)
 }, error = function(e) {
  print(paste("Error in ITEM sphericity test:", e$message))
 })
 # Test sphericity for TIME (2 levels - within-subjects)
 print("\nMauchly's Test of Sphericity for TIME:")
 tryCatch({
  ez_time <- ezANOVA(data = temp_data, 
                       dv = MEAN_DIFFERENCE, 
                       wid = id, 
                       within = TIME,
                     type = 3,
                     detailed = TRUE)
  print("TIME Sphericity Test:")
  print(ez_time$Mauchly)
 }, error = function(e) {
  print(paste("Error in TIME sphericity test:", e$message))
 })
 # Test sphericity for TIME × ITEM interaction
 print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
 tryCatch({
  ez_interaction <- ezANOVA(data = temp_data, 
                           dv = MEAN_DIFFERENCE, 
                           wid = id, 
                           within = .(TIME, ITEM),
                           type = 3,
                           detailed = TRUE)
  print("TIME × ITEM Sphericity Test:")
  print(ez_interaction$Mauchly)
 }, error = function(e) {
  print(paste("Error in TIME × ITEM sphericity test:", e$message))
 })
 # =============================================================================
 # CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
 # =============================================================================
 print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
 # Get corrected results from ezANOVA
 ez_corrected <- ezANOVA(data = long_data_clean, 
                       dv = MEAN_DIFFERENCE, 
                       wid = pID, 
                       within = .(TIME, ITEM),
                       type = 3,
                       detailed = TRUE)
 print("Corrected ANOVA Results with Sphericity Corrections:")
 print(ez_corrected$ANOVA)
 # Show epsilon values for sphericity corrections
 print("\nEpsilon Values for Sphericity Corrections:")
 print(ez_corrected$Mauchly)
 # Show sphericity-corrected results
 print("\nSphericity-Corrected Results:")
 print("Available elements in ez_corrected object:")
 print(names(ez_corrected))
 # Check if sphericity corrections are available
 if(!is.null(ez_corrected$`Sphericity Corrections`)) {
  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
  print(ez_corrected$`Sphericity Corrections`)
  # Extract and display corrected degrees of freedom
  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
  # Get the sphericity corrections
  sphericity_corr <- ez_corrected$`Sphericity Corrections`
  # Extract original degrees of freedom from ANOVA table
  anova_table <- ez_corrected$ANOVA
  # Calculate corrected degrees of freedom
  corrected_df <- data.frame(
    Effect = sphericity_corr$Effect,
    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
    GG_epsilon = sphericity_corr$GGe,
    HF_epsilon = sphericity_corr$HFe
  )
  print(corrected_df)
  # Also show the corrected F-values and p-values with degrees of freedom
  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
  for(i in 1:nrow(corrected_df)) {
    effect <- corrected_df$Effect[i]
    f_value <- anova_table$F[match(effect, anova_table$Effect)]
    cat(sprintf("\n%s:\n", effect))
    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
  }
 } else {
  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
  print("Check the Mauchly's test p-values above to determine if corrections are needed")
 }
 # =============================================================================
 # ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
 # =============================================================================
 print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
 # Create a wide-format data for car package (library already loaded)
 tryCatch({
  # Convert to wide format for car package
  wide_data <- long_data_clean %>%
    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
    pivot_wider(names_from = c(TIME, ITEM), 
                values_from = MEAN_DIFFERENCE,
                names_sep = "_")
  # Create the repeated measures design
  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
  # Check if all columns exist
  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
  if(length(missing_cols) > 0) {
    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
  } else {
    # Create the repeated measures design
    rm_design <- as.matrix(wide_data[, within_vars])
    # Calculate epsilon values
    print("Epsilon Values from car package:")
    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
    # Interpretation
    if(epsilon_gg < 0.75) {
      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
    } else if(epsilon_hf > 0.75) {
      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
    } else {
      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
    }
    # =============================================================================
    # MANUAL SPHERICITY CORRECTIONS
    # =============================================================================
    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
    # Apply corrections to the original ANOVA results
    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
    # ITEM main effect (DFn = 4, DFd = 4244)
    item_df_corrected_gg <- 4 * epsilon_gg
    item_df_corrected_hf <- 4 * epsilon_hf
    print(paste("ITEM: Original df = 4, 4244"))
    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
    interaction_df_corrected_gg <- 4 * epsilon_gg
    interaction_df_corrected_hf <- 4 * epsilon_hf
    print(paste("TIME × ITEM: Original df = 4, 4244"))
    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
    # Note: You would need to recalculate p-values with these corrected dfs
    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
    print("The ezANOVA function should handle this automatically, but may not display the corrections")
  }
 }, error = function(e) {
  print(paste("Error in manual epsilon calculation:", e$message))
 })
 # =============================================================================
 # EFFECT SIZES (GENERALIZED ETA SQUARED)
 # =============================================================================
 # Effect size calculations (library already loaded)
 print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
 # Calculate generalized eta squared for the aov model
 print("Effect Sizes from aov() model:")
 tryCatch({
  # Extract effect sizes from aov model
  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
  print(round(aov_effects, 5))
 }, error = function(e) {
  print(paste("Error calculating effect sizes from aov:", e$message))
 })
 # Calculate effect sizes for ezANOVA model
 print("\nEffect Sizes from ezANOVA model:")
 tryCatch({
  # ezANOVA provides partial eta squared, convert to generalized
  ez_effects <- ez_corrected$ANOVA
  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
  print("Generalized Eta Squared from ezANOVA:")
  print(round(ez_effects[, c("Effect", "ges")], 5))
 }, error = function(e) {
  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
 })
 # Extract effect sizes (generalized eta squared)
 # For aov() objects, we need to extract from the summary
 anova_summary <- summary(mixed_anova_model)
 # =============================================================================
 # NOTE: MIXED MODELS (LMER) NOT NEEDED
 # =============================================================================
 # For this balanced repeated measures design, Type III ANOVA with proper 
 # sphericity corrections (implemented above) is the most appropriate approach.
 # Mixed models (lmer) are typically used for:
 # - Unbalanced designs
 # - Missing data patterns
 # - Nested random effects
 # - Large, complex datasets
 #
 # Your design is balanced and complete, making Type III ANOVA optimal.
 # =============================================================================
 # POST-HOC COMPARISONS
 # =============================================================================
 # Post-hoc comparisons using emmeans
 print("\n=== POST-HOC COMPARISONS ===")
 # Main effect of TIME
 print("Main Effect of TIME:")
 time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
 print("Estimated Marginal Means:")
 print(time_emmeans)
 print("\nPairwise Contrasts:")
 time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
 print(time_contrasts)
 # Main effect of ITEM
 print("\nMain Effect of ITEM:")
 item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
 print("Estimated Marginal Means:")
 print(item_emmeans)
 print("\nPairwise Contrasts:")
 item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
 print(item_contrasts)
 # =============================================================================
 # INTERACTION EXPLORATIONS
 # =============================================================================
 # TIME × ITEM Interaction
 print("\n=== TIME × ITEM INTERACTION ===")
 time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
 print("Estimated Marginal Means:")
 print(time_item_emmeans)
 print("\nSimple Effects of ITEM within each TIME:")
 time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
 print(time_item_simple)
 print("\nSimple Effects of TIME within each ITEM:")
 time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
 print(time_item_simple2)
 # =============================================================================
 # COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
 # =============================================================================
 # =============================================================================
 # COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
 # =============================================================================
 # Cohen's d calculations (library already loaded)
 print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
 # Function to calculate Cohen's d for pairwise comparisons
 calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
  if(nrow(significant_pairs) > 0) {
    cat("Significant pairwise comparisons (p < 0.05):\n")
    print(significant_pairs)
    cat("\nCohen's d calculated from raw data:\n")
    for(i in seq_len(nrow(significant_pairs))) {
      comparison <- significant_pairs[i, ]
      contrast_name <- as.character(comparison$contrast)
      # Parse the contrast
      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
      if(length(contrast_parts) == 2) {
        level1 <- trimws(contrast_parts[1])
        level2 <- trimws(contrast_parts[2])
        # Get raw data for both conditions
        if(group2_var %in% colnames(comparison)) {
          group2_level <- as.character(comparison[[group2_var]])
          data1 <- data[[response_var]][
            data[[group1_var]] == level1 & 
            data[[group2_var]] == group2_level]
          data2 <- data[[response_var]][
            data[[group1_var]] == level2 & 
            data[[group2_var]] == group2_level]
        } else {
          data1 <- data[[response_var]][data[[group1_var]] == level1]
          data2 <- data[[response_var]][data[[group1_var]] == level2]
        }
        if(length(data1) > 0 && length(data2) > 0) {
          # Calculate Cohen's d using effsize package
          cohens_d_result <- cohen.d(data1, data2)
          cat(sprintf("Comparison: %s", contrast_name))
          if(group2_var %in% colnames(comparison)) {
            cat(sprintf(" | %s", group2_level))
          }
          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
          cat("\n")
        }
      }
    }
  } else {
    cat("No significant pairwise comparisons found.\n")
  }
 }
 # =============================================================================
 # 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
 # =============================================================================
 print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
 # Get simple effects of TIME within each ITEM
 time_item_simple2_df <- as.data.frame(time_item_simple2)
 calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
 # Get simple effects of ITEM within each TIME
 time_item_simple_df <- as.data.frame(time_item_simple)
 calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
--- a/eohi1/normality_plots.pdf
+++ b/eohi1/normality_plots.pdf
--- a/eohi1/pearson_correlations.csv
+++ b/eohi1/pearson_correlations.csv
@ -0,0 +1,10 @@
 "","NPast_mean_total","NFut_mean_total","DGEN_past_mean","DGEN_fut_mean","domain_mean","DGEN_mean","AOT_total","CRT_correct","CRT_int"
 "NPast_mean_total",1,0.58407,0.37645,0.19034,0.91767,0.33077,0.04531,-0.00144,-0.01053
 "NFut_mean_total",0.58407,1,0.31658,0.32376,0.85851,0.37059,-0.04928,-0.074,0.05859
 "DGEN_past_mean",0.37645,0.31658,1,0.77074,0.3928,0.92613,-0.20675,-0.04166,0.0038
 "DGEN_fut_mean",0.19034,0.32376,0.77074,1,0.27874,0.92466,-0.28285,-0.0818,0.03967
 "domain_mean",0.91767,0.85851,0.3928,0.27874,1,0.39038,0.0045,-0.03713,0.02203
 "DGEN_mean",0.33077,0.37059,0.92613,0.92466,0.39038,1,-0.23213,-0.05422,0.01483
 "AOT_total",0.04531,-0.04928,-0.20675,-0.28285,0.0045,-0.23213,1,0.24279,-0.20318
 "CRT_correct",-0.00144,-0.074,-0.04166,-0.0818,-0.03713,-0.05422,0.24279,1,-0.89661
 "CRT_int",-0.01053,0.05859,0.0038,0.03967,0.02203,0.01483,-0.20318,-0.89661,1
--- a/eohi1/regression
+++ b/eohi1/regression
@ -0,0 +1,232 @@
 options(scipen = 999)
 library(dplyr)
 library(car)
 library(lmtest)
 library(stargazer)
 library(sandwich)
 library(lmtest)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 df <- read.csv("ehi1.csv")
 data <- df %>%
  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
  mutate(demo_edu = as.factor(demo_edu))
 # examine data object
 str(data)
 colSums(is.na(data))
 sapply(data, class)
 levels(data$demo_edu)
 data$demo_edu <- factor(data$demo_edu, levels = c(
  "High School (or equivalent)",
  "Trade School (non-military)", 
  "College Diploma/Certificate",
  "University - Undergraduate",
  "University - Graduate (Masters)",
  "University - PhD",
  "Professional Degree (ex. JD/MD)"
 ))
 levels(data$demo_edu)
 # Create dummy variables
 dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
 dummy_df <- as.data.frame(dummy_vars)
 # Rename columns with meaningful names (excluding reference level)
 colnames(dummy_df) <- c(
  "edu_highschool",           # reference level (will be dropped)
  "edu_trade",
  "edu_college", 
  "edu_uni_undergrad",
  "edu_uni_masters",
  "edu_uni_phd",
  "edu_prof"
 )
 # Add to your data
 data <- cbind(data, dummy_df)
 data <- data %>% select(-starts_with("edu_highschool"))
 #### MODEL 1 - DGEN ####
 model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
 # Model 1 diagnostics
 par(mfrow = c(2, 2))
 plot(model_DGEN, which = 1)  # Residuals vs Fitted
 plot(model_DGEN, which = 2)  # Normal Q-Q, normality
 hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
 shapiro.test(residuals(model_DGEN))
 plot(model_DGEN, which = 3)  # Scale-Location
 plot(model_DGEN, which = 4)  # Cook's Distance
 # Model 1 specific tests
 vif(model_DGEN)              # Multicollinearity
 dwtest(model_DGEN)           # Independence
 outlierTest(model_DGEN)      # Outliers
 # Look at the specific influential cases
 data[c(670, 388, 760), ]
 # 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
 # heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
 # normality violated but sample size is robust to violation
 # no multicollinearity
 # no autocorrelation (samples are independent)
 #results
 print(summary(model_DGEN))
 print(AIC(model_DGEN))
 # Create a nice formatted table
 stargazer(model_DGEN, type = "text", 
          title = "Regression Results: Education and EOHI-DGEN",
          dep.var.labels = "EOHI-DGEN Mean",
          covariate.labels = c("Trade School", "College", "University Undergrad", 
                              "University Masters", "University PhD", "Professional Degree"),
          report = "vcsp",
          add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
 # Use robust standard errors (doesn't change coefficients, just SEs)
 modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
 stargazer(modelDGEN_robust, type = "text", 
          title = "Regression Results: Education and EOHI-DGEN",
          dep.var.labels = "EOHI-DGEN Mean",
          covariate.labels = c("Trade School", "College", "University Undergrad", 
                              "University Masters", "University PhD", "Professional Degree"),
          report = "vcsp") 
 #### MODEL 2 - DOMAIN ####
 model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
 # ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
 plot(model_domain, which = 1)  # Residuals vs Fitted
 plot(model_domain, which = 2)  # Normal Q-Q, normality
 hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
 shapiro.test(residuals(model_domain))
 plot(model_domain, which = 3)  # Scale-Location
 plot(model_domain, which = 4)  # Cook's Distance
 # Model 2 specific tests
 vif(model_domain)              # Multicollinearity
 dwtest(model_domain)           # Independence
 outlierTest(model_domain)      # Outliers
 # Check if the autocorrelation is real or artifactual
 # Plot residuals against observation order
 plot(residuals(model_domain), type = "l")
 abline(h = 0, col = "red")
 # 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
 # heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
 # normality violated but sample size is robust to violation
 # no multicollinearity
 # auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
 # Reset plotting to 1x1
 # par(mfrow = c(1, 1))
 print(summary(model_domain))
 print(AIC(model_domain))
 stargazer(model_domain, type = "text", 
          title = "Regression Results: Education and EHI Domain",
          dep.var.labels = "EHI Domain Mean",
          covariate.labels = c("Trade School", "College", "University Undergrad", 
                              "University Masters", "University PhD", "Professional Degree"),
          report = "vcsp",  # This shows coefficients, SEs, and p-values
          add.lines = list(c("AIC", round(AIC(model_domain), 2))))
 # Use robust standard errors (doesn't change coefficients, just SEs)
 modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
 stargazer(modelDOMAIN_robust, type = "text", 
          title = "Regression Results: Education and EHI Domain",
          dep.var.labels = "EHI Domain Mean",
          covariate.labels = c("Trade School", "College", "University Undergrad", 
                              "University Masters", "University PhD", "Professional Degree"),
          report = "vcsp") 
 #### PLOTS ####
 library(ggplot2)
 library(dplyr)
 # Calculate means and confidence intervals for EOHI-DGEN
 edu_summary_DGEN <- data %>%
  group_by(demo_edu) %>%
  summarise(
    mean_DGEN = mean(eohiDGEN_mean, na.rm = TRUE),
    n = n(),
    se_DGEN = sd(eohiDGEN_mean, na.rm = TRUE) / sqrt(n()),
    ci_lower_DGEN = mean_DGEN - 1.96 * se_DGEN,
    ci_upper_DGEN = mean_DGEN + 1.96 * se_DGEN
  )
 # Calculate means and confidence intervals for EHI Domain
 edu_summary_domain <- data %>%
  group_by(demo_edu) %>%
  summarise(
    mean_domain = mean(ehi_global_mean, na.rm = TRUE),
    n = n(),
    se_domain = sd(ehi_global_mean, na.rm = TRUE) / sqrt(n()),
    ci_lower_domain = mean_domain - 1.96 * se_domain,
    ci_upper_domain = mean_domain + 1.96 * se_domain
  )
 # Plot 1: EOHI-DGEN means with confidence intervals
 p1 <- ggplot(edu_summary_DGEN, aes(x = demo_edu, y = mean_DGEN)) +
  geom_point(size = 3, color = "steelblue") +
  geom_errorbar(aes(ymin = ci_lower_DGEN, ymax = ci_upper_DGEN), 
                width = 0.1, color = "steelblue", linewidth = 1) +
  labs(
    title = "Mean EOHI-DGEN by Education Level",
    subtitle = "Error bars show 95% confidence intervals",
    x = "Education Level",
    y = "Mean EOHI-DGEN Score"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 10, color = "gray60")
  )
 # Plot 2: EHI Domain means with confidence intervals
 p2 <- ggplot(edu_summary_domain, aes(x = demo_edu, y = mean_domain)) +
  geom_point(size = 3, color = "darkgreen") +
  geom_errorbar(aes(ymin = ci_lower_domain, ymax = ci_upper_domain), 
                width = 0.1, color = "darkgreen", linewidth = 1) +
  labs(
    title = "Mean EHI Domain by Education Level",
    subtitle = "Error bars show 95% confidence intervals",
    x = "Education Level",
    y = "Mean EHI Domain Score"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 10, color = "gray60")
  )
 # Display the plots
 print(p1)
 print(p2)
 # Save the plots
 ggsave("education_DGEN_means.png", p1, width = 10, height = 6, dpi = 300)
 ggsave("education_domain_means.png", p2, width = 10, height = 6, dpi = 300)
--- a/eohi1/regression
+++ b/eohi1/regression
@ -0,0 +1,320 @@
 options(scipen = 999)
 library(dplyr)
 library(car)
 library(lmtest)
 library(stargazer)
 library(sandwich)
 library(lmtest)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
 df <- read.csv("ehi1.csv")
 data <- df %>%
  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
  filter(demo_sex != "Prefer not to say")
 str(data)
 colSums(is.na(data))
 sapply(data, class)
 # Create dummy variable for sex (0 = Male, 1 = Female)
 data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
 # Verify the dummy coding
 print(table(data$demo_sex, data$sex_dummy))
 #descriptives
 # Descriptives for age
 print(summary(data$demo_age_1))
 print(sd(data$demo_age_1, na.rm = TRUE))
 # Center demo_age_1 (subtract the mean)
 data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
 # Verify the centering
 print(summary(data$age_centered))
 # Descriptives for sex (frequency table)
 print(table(data$demo_sex))
 print(prop.table(table(data$demo_sex)))
 # Descriptives for sex dummy variable
 print(table(data$sex_dummy))
 #### REGRESSION MODELS ####
 # MODEL 1: Age only - EOHI
 age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
 par(mfrow = c(2, 2))
 plot(age_DGEN)
 print(shapiro.test(residuals(age_DGEN)))
 print(summary(age_DGEN))
 print(AIC(age_DGEN))
 # MODEL 1: Age only - EHI
 age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
 par(mfrow = c(2, 2))
 plot(age_domain)
 print(shapiro.test(residuals(age_domain)))
 print(summary(age_domain))
 print(AIC(age_domain))
 # MODEL 2: Sex only - EOHI
 sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
 par(mfrow = c(2, 2))
 plot(sex_DGEN)
 print(shapiro.test(residuals(sex_DGEN)))
 print(summary(sex_DGEN))
 print(AIC(sex_DGEN))
 # P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
 # P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
 # P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
 # across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
 # MODEL 2: Sex only - EHI
 sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
 par(mfrow = c(2, 2))
 plot(sex_domain)
 print(shapiro.test(residuals(sex_domain)))
 print(summary(sex_domain))
 print(AIC(sex_domain))
 # MODEL 3: Age + Sex + Interaction - EOHI
 interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
 par(mfrow = c(2, 2))
 plot(interaction_DGEN)
 print(shapiro.test(residuals(interaction_DGEN)))
 vif_DGEN <- vif(interaction_DGEN)
 print(vif_DGEN)
 print(summary(interaction_DGEN))
 print(AIC(interaction_DGEN))
 # MODEL 3: Age + Sex + Interaction - EHI
 interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
 par(mfrow = c(2, 2))
 plot(interaction_domain)
 print(shapiro.test(residuals(interaction_domain)))
 vif_domain <- vif(interaction_domain)
 print(vif_domain)
 print(summary(interaction_domain))
 print(AIC(interaction_domain))
 #### troubleshooting ####
 # Clear any existing plots
 # dev.off()
 #### PLOTS ####
 # Create visual figures for age models
 library(ggplot2)
 # Figure 1: age_DGEN model
 p1 <- ggplot(data, aes(x = age_centered, y = eohiDGEN_mean)) +
  geom_point(alpha = 0.6, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
  labs(
    title = "Age and EOHI-DGEN Relationship",
    x = "Age (centered)",
    y = "EOHI-DGEN Mean",
    subtitle = paste("R² =", round(summary(age_DGEN)$r.squared, 3),
                     ", p < 0.001")
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    plot.subtitle = element_text(size = 10, color = "gray60")
  )
 # Figure 2: age_domain model
 p2 <- ggplot(data, aes(x = age_centered, y = ehi_global_mean)) +
  geom_point(alpha = 0.6, color = "darkgreen") +
  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
  labs(
    title = "Age and EHI Domain Relationship",
    x = "Age (centered)",
    y = "EHI Domain Mean",
    subtitle = paste("R² =", round(summary(age_domain)$r.squared, 3),
                     ", p < 0.001")
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    plot.subtitle = element_text(size = 10, color = "gray60")
  )
 # Save the plots
 ggsave("age_DGEN_plot.png", p1, width = 8, height = 6, dpi = 300)
 ggsave("age_domain_plot.png", p2, width = 8, height = 6, dpi = 300)
 # Display the plots
 print(p1)
 print(p2)
 #### HTML file ####
 # Create comprehensive HTML report grouped by model
 library(htmltools)
 # Start HTML document
 html_content <- htmltools::div(
  htmltools::h1("Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain"),
  # EOHI-DGEN Models Section
  htmltools::h2("EOHI-DGEN Models"),
  # Age Model
  htmltools::h3("1. Age Model (age_DGEN)"),
  htmltools::div(
    style = "margin-bottom: 30px;",
    htmltools::h4("Model Results, AIC, and VIF"),
    htmltools::HTML(
      stargazer(
        age_DGEN,
        type = "html",
        title = "Age Model: EOHI-DGEN",
        dep.var.labels = "EOHI-DGEN Mean",
        covariate.labels = c("Age (centered)"),
        report = "vcsp*",
        add.lines = list(
          c("AIC", round(AIC(age_DGEN), 2)),
          c("Max VIF", "N/A")
        )
      )
    ),
    htmltools::h4("Assumption Diagnostic Plots"),
    htmltools::img(src = "age_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;"),
    htmltools::h4("Model Visualization"),
    htmltools::img(src = "age_DGEN_plot.png", style = "width: 100%; max-width: 800px;")
  ),
  # Sex Model
  htmltools::h3("2. Sex Model (sex_DGEN)"),
  htmltools::div(
    style = "margin-bottom: 30px;",
    htmltools::h4("Model Results, AIC, and VIF"),
    htmltools::HTML(
      stargazer(
        sex_DGEN,
        type = "html",
        title = "Sex Model: EOHI-DGEN",
        dep.var.labels = "EOHI-DGEN Mean",
        covariate.labels = c("Sex (dummy)"),
        report = "vcsp*",
        add.lines = list(
          c("AIC", round(AIC(sex_DGEN), 2)),
          c("Max VIF", "N/A")
        )
      )
    ),
    htmltools::h4("Assumption Diagnostic Plots"),
    htmltools::img(src = "sex_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
  ),
  # Interaction Model
  htmltools::h3("3. Interaction Model (interaction_DGEN)"),
  htmltools::div(
    style = "margin-bottom: 30px;",
    htmltools::h4("Model Results, AIC, and VIF"),
    htmltools::HTML(
      stargazer(
        interaction_DGEN,
        type = "html",
        title = "Interaction Model: EOHI-DGEN",
        dep.var.labels = "EOHI-DGEN Mean",
        covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
        report = "vcsp*",
        add.lines = list(
          c("AIC", round(AIC(interaction_DGEN), 2)),
          c("Max VIF", max_vif_DGEN)
        )
      )
    ),
    htmltools::h4("Assumption Diagnostic Plots"),
    htmltools::img(src = "interaction_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
  ),
  # EHI Domain Models Section
  htmltools::h2("EHI Domain Models"),
  # Age Model
  htmltools::h3("1. Age Model (age_domain)"),
  htmltools::div(
    style = "margin-bottom: 30px;",
    htmltools::h4("Model Results, AIC, and VIF"),
    htmltools::HTML(
      stargazer(
        age_domain,
        type = "html",
        title = "Age Model: EHI Domain",
        dep.var.labels = "EHI Domain Mean",
        covariate.labels = c("Age (centered)"),
        report = "vcsp*",
        add.lines = list(
          c("AIC", round(AIC(age_domain), 2)),
          c("Max VIF", "N/A")
        )
      )
    ),
    htmltools::h4("Assumption Diagnostic Plots"),
    htmltools::img(src = "age_domain_assumptions.png", style = "width: 100%; max-width: 800px;"),
    htmltools::h4("Model Visualization"),
    htmltools::img(src = "age_domain_plot.png", style = "width: 100%; max-width: 800px;")
  ),
  # Sex Model
  htmltools::h3("2. Sex Model (sex_domain)"),
  htmltools::div(
    style = "margin-bottom: 30px;",
    htmltools::h4("Model Results, AIC, and VIF"),
    htmltools::HTML(
      stargazer(
        sex_domain,
        type = "html",
        title = "Sex Model: EHI Domain",
        dep.var.labels = "EHI Domain Mean",
        covariate.labels = c("Sex (dummy)"),
        report = "vcsp*",
        add.lines = list(
          c("AIC", round(AIC(sex_domain), 2)),
          c("Max VIF", "N/A")
        )
      )
    ),
    htmltools::h4("Assumption Diagnostic Plots"),
    htmltools::img(src = "sex_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
  ),
  # Interaction Model
  htmltools::h3("3. Interaction Model (interaction_domain)"),
  htmltools::div(
    style = "margin-bottom: 30px;",
    htmltools::h4("Model Results, AIC, and VIF"),
    htmltools::HTML(
      stargazer(
        interaction_domain,
        type = "html",
        title = "Interaction Model: EHI Domain",
        dep.var.labels = "EHI Domain Mean",
        covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
        report = "vcsp*",
        add.lines = list(
          c("AIC", round(AIC(interaction_domain), 2)),
          c("Max VIF", max_vif_domain)
        )
      )
    ),
    htmltools::h4("Assumption Diagnostic Plots"),
    htmltools::img(src = "interaction_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
  )
 )
 # Save HTML content
 htmltools::save_html(html_content, "regression_analysis_report.html")
 print("HTML report created: regression_analysis_report.html")
--- a/eohi1/regression_DGEN_models.html
+++ b/eohi1/regression_DGEN_models.html
@ -0,0 +1,31 @@
 <table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
 <tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
 <tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
 <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
 <tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
 <tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
 <tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
 <tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
 <tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
 <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
 <tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
 <tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
 <tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
 <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
 <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
 <tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
 </table>
--- a/eohi1/regression_EHI_domain_models.html
+++ b/eohi1/regression_EHI_domain_models.html
@ -0,0 +1,31 @@
 <table style="text-align:center"><caption><strong>Regression Models: EHI Domain</strong></caption>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
 <tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
 <tr><td style="text-align:left"></td><td colspan="3">EHI Domain Mean</td></tr>
 <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td><td></td><td>-0.006</td></tr>
 <tr><td style="text-align:left"></td><td>(0.001)</td><td></td><td>(0.001)</td></tr>
 <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td></td><td>p = 0.00001<sup>***</sup></td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.031</td><td>0.033</td></tr>
 <tr><td style="text-align:left"></td><td></td><td>(0.031)</td><td>(0.030)</td></tr>
 <tr><td style="text-align:left"></td><td></td><td>p = 0.311</td><td>p = 0.274</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>-0.0003</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td>(0.002)</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.883</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Constant</td><td>0.137</td><td>0.121</td><td>0.120</td></tr>
 <tr><td style="text-align:left"></td><td>(0.015)</td><td>(0.022)</td><td>(0.021)</td></tr>
 <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td><td>1562.88</td><td>1518.52</td></tr>
 <tr><td style="text-align:left">Max VIF</td><td>NA</td><td>NA</td><td>2.22</td></tr>
 <tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
 <tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td><td>0.001</td><td>0.045</td></tr>
 <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td><td>0.00003</td><td>0.043</td></tr>
 <tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td><td>0.504 (df = 1059)</td><td>0.494 (df = 1057)</td></tr>
 <tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td><td>1.029 (df = 1; 1059)</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
 </table>
--- a/eohi1/regression_EOHI_models.html
+++ b/eohi1/regression_EOHI_models.html
@ -0,0 +1,31 @@
 <table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
 <tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
 <tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
 <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
 <tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
 <tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
 <tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
 <tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
 <tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
 <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
 <tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
 <tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
 <tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
 <tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
 <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
 <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
 <tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
 <tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
 </table>
--- a/eohi1/regression_analysis_report.html
+++ b/eohi1/regression_analysis_report.html
@ -0,0 +1,62 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
 <meta charset="utf-8"/>
 <style>body{background-color:white;}</style>
 </head>
 <body>
 <div>
  <h1>Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain</h1>
  <h2>EOHI-DGEN Models</h2>
  <h3>1. Age Model (age_DGEN)</h3>
  <div style="margin-bottom: 30px;">
    <h4>Model Results, AIC, and VIF</h4>
     <table style="text-align:center"><caption><strong>Age Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td></tr> <tr><td style="text-align:left"></td><td>(0.003)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.417</td></tr> <tr><td style="text-align:left"></td><td>(0.053)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
    <h4>Assumption Diagnostic Plots</h4>
    <img src="age_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
    <h4>Model Visualization</h4>
    <img src="age_DGEN_plot.png" style="width: 100%; max-width: 800px;"/>
  </div>
  <h3>2. Sex Model (sex_DGEN)</h3>
  <div style="margin-bottom: 30px;">
    <h4>Model Results, AIC, and VIF</h4>
     <table style="text-align:center"><caption><strong>Sex Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.099</td></tr> <tr><td style="text-align:left"></td><td>(0.107)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.355</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.367</td></tr> <tr><td style="text-align:left"></td><td>(0.076)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4192.48</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>-0.0001</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.742 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>0.859 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
    <h4>Assumption Diagnostic Plots</h4>
    <img src="sex_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
  </div>
  <h3>3. Interaction Model (interaction_DGEN)</h3>
  <div style="margin-bottom: 30px;">
    <h4>Model Results, AIC, and VIF</h4>
     <table style="text-align:center"><caption><strong>Interaction Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.018</td></tr> <tr><td style="text-align:left"></td><td>(0.005)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.0004<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.103</td></tr> <tr><td style="text-align:left"></td><td>(0.106)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.331</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.007)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.402</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.364</td></tr> <tr><td style="text-align:left"></td><td>(0.075)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4176.42</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.020</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
    <h4>Assumption Diagnostic Plots</h4>
    <img src="interaction_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
  </div>
  <h2>EHI Domain Models</h2>
  <h3>1. Age Model (age_domain)</h3>
  <div style="margin-bottom: 30px;">
    <h4>Model Results, AIC, and VIF</h4>
     <table style="text-align:center"><caption><strong>Age Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.137</td></tr> <tr><td style="text-align:left"></td><td>(0.015)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
    <h4>Assumption Diagnostic Plots</h4>
    <img src="age_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
    <h4>Model Visualization</h4>
    <img src="age_domain_plot.png" style="width: 100%; max-width: 800px;"/>
  </div>
  <h3>2. Sex Model (sex_domain)</h3>
  <div style="margin-bottom: 30px;">
    <h4>Model Results, AIC, and VIF</h4>
     <table style="text-align:center"><caption><strong>Sex Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.031</td></tr> <tr><td style="text-align:left"></td><td>(0.031)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.311</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.121</td></tr> <tr><td style="text-align:left"></td><td>(0.022)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1562.88</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.00003</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.504 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>1.029 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
    <h4>Assumption Diagnostic Plots</h4>
    <img src="sex_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
  </div>
  <h3>3. Interaction Model (interaction_domain)</h3>
  <div style="margin-bottom: 30px;">
    <h4>Model Results, AIC, and VIF</h4>
     <table style="text-align:center"><caption><strong>Interaction Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.033</td></tr> <tr><td style="text-align:left"></td><td>(0.030)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.274</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>-0.0003</td></tr> <tr><td style="text-align:left"></td><td>(0.002)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.883</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.120</td></tr> <tr><td style="text-align:left"></td><td>(0.021)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1518.52</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.045</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.494 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
    <h4>Assumption Diagnostic Plots</h4>
    <img src="interaction_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
  </div>
 </div>
 </body>
 </html>
--- a/eohi1/reliability_analysis_cronbach_alpha.r
+++ b/eohi1/reliability_analysis_cronbach_alpha.r
@ -0,0 +1,204 @@
 # Cronbach's Alpha Reliability Analysis
 # For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
 # 5 items per domain per time point
 # Load required libraries
 library(psych)
 library(dplyr)
 library(tidyr)
 # Read the data
 data <- read.csv("exp1.csv")
 # Define the scale variables for each domain and time point
 # Past time point scales
 past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
 past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
 past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
                  "NPastDiff_val_performance", "NPastDiff_val_justice")
 past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
                   "NPastDiff_life_important", "NPastDiff_life_change")
 # Future time point scales
 fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
 fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", 
                  "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
 fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", 
                 "NFutDiff_val_performance", "NFutDiff_val_justice")
 fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", 
                  "NFutDiff_life_important", "NFutDiff_life_change")
 # Function to calculate Cronbach's alpha and return detailed results
 calc_cronbach_alpha <- function(data, var_names, scale_name) {
  # Check for missing values
  scale_data <- data[, var_names]
  missing_info <- data.frame(
    Variable = var_names,
    Missing_Count = colSums(is.na(scale_data)),
    Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
  )
  # Remove rows with any missing values for reliability analysis
  complete_data <- scale_data[complete.cases(scale_data), ]
  cat("\n", "="*60, "\n")
  cat("SCALE:", scale_name, "\n")
  cat("="*60, "\n")
  cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
  cat("Original sample size:", nrow(data), "\n")
  cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
  cat("Missing data summary:\n")
  print(missing_info)
  if(nrow(complete_data) < 3) {
    cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
    return(NULL)
  }
  # Calculate Cronbach's alpha
  alpha_result <- alpha(complete_data, check.keys = TRUE)
  cat("\nCronbach's Alpha Results:\n")
  cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
  cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
  cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
  cat("Number of items:", alpha_result$total$nvar, "\n")
  # Item statistics
  cat("\nItem Statistics:\n")
  item_stats <- data.frame(
    Item = var_names,
    Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
    Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
    Mean = round(alpha_result$item.stats$mean, 4),
    SD = round(alpha_result$item.stats$sd, 4)
  )
  print(item_stats)
  # Check assumptions
  cat("\nAssumption Checks:\n")
  # 1. Check for sufficient sample size (minimum 30 recommended)
  sample_size_ok <- nrow(complete_data) >= 30
  cat("Sample size adequate (≥30):", sample_size_ok, "\n")
  # 2. Check for adequate inter-item correlations (should be > 0.30)
  inter_item_cors <- cor(complete_data)
  inter_item_cors[lower.tri(inter_item_cors)] <- NA
  diag(inter_item_cors) <- NA
  inter_item_cors_flat <- as.vector(inter_item_cors)
  inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
  adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
  cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
  # 3. Check for negative correlations (concerning for unidimensionality)
  negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
  cat("Number of negative inter-item correlations:", negative_cors, "\n")
  # 4. Check item variances (should be roughly similar)
  item_vars <- apply(complete_data, 2, var)
  var_ratio <- max(item_vars) / min(item_vars)
  cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
  return(alpha_result)
 }
 # Calculate Cronbach's alpha for all scales
 cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
 cat("\nData: exp1.csv")
 cat("\nTotal sample size:", nrow(data))
 # Past time point analyses
 past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
 past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
 past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
 past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
 # Future time point analyses
 fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
 fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
 fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
 fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
 # Summary table
 cat("\n", "="*80, "\n")
 cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
 cat("\n", "="*80, "\n")
 summary_results <- data.frame(
  Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
           "Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
  Alpha = c(
    if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
    if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
  ),
  Items = rep(5, 8),
  Interpretation = c(
    if(!is.null(past_pref_alpha)) {
      alpha_val <- past_pref_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(past_pers_alpha)) {
      alpha_val <- past_pers_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(past_val_alpha)) {
      alpha_val <- past_val_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(past_life_alpha)) {
      alpha_val <- past_life_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(fut_pref_alpha)) {
      alpha_val <- fut_pref_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(fut_pers_alpha)) {
      alpha_val <- fut_pers_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(fut_val_alpha)) {
      alpha_val <- fut_val_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data",
    if(!is.null(fut_life_alpha)) {
      alpha_val <- fut_life_alpha$total$raw_alpha
      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
    } else "Insufficient data"
  )
 )
 print(summary_results)
 # Save results to CSV
 write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
 cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
 cat("\n", "="*80, "\n")
 cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
 cat("\n", "="*80, "\n")
 cat("α ≥ 0.90: Excellent reliability\n")
 cat("α ≥ 0.80: Good reliability\n") 
 cat("α ≥ 0.70: Acceptable reliability\n")
 cat("α ≥ 0.60: Questionable reliability\n")
 cat("α < 0.60: Poor reliability\n")
--- a/eohi1/residual_plots.pdf
+++ b/eohi1/residual_plots.pdf
--- a/eohi1/results
+++ b/eohi1/results
--- a/eohi1/results
+++ b/eohi1/results
--- a/eohi1/sex_DGEN_assumptions.png
+++ b/eohi1/sex_DGEN_assumptions.png
--- a/eohi1/sex_domain_assumptions.png
+++ b/eohi1/sex_domain_assumptions.png
--- a/eohi1/spearman_correlations.csv
+++ b/eohi1/spearman_correlations.csv
@ -0,0 +1,14 @@
 "","eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean","ehi_pref_mean","ehi_pers_mean","ehi_val_mean","ehi_life_mean","ehi_global_mean","AOT_total","CRT_correct","CRT_int"
 "eohiDGEN_pref",1,0.27683,0.23964,0.24515,0.66949,0.22538,0.09704,0.08007,0.12507,0.21415,0.06836,0.05361,-0.05819
 "eohiDGEN_pers",0.27683,1,0.31901,0.18576,0.70868,0.21513,0.18684,0.13159,0.12199,0.26738,0.0916,0.02566,-0.01732
 "eohiDGEN_val",0.23964,0.31901,1,0.13719,0.68261,0.14816,0.12424,0.23448,0.06907,0.21756,0.10396,0.07479,-0.06044
 "eohiDGEN_life",0.24515,0.18576,0.13719,1,0.25499,0.02053,0.06528,0.04652,0.29721,0.23854,0.11795,0.03949,-0.0013
 "eohiDGEN_mean",0.66949,0.70868,0.68261,0.25499,1,0.26963,0.17831,0.19688,0.1283,0.31052,0.11102,0.06665,-0.05731
 "ehi_pref_mean",0.22538,0.21513,0.14816,0.02053,0.26963,1,0.23765,0.19925,0.0625,0.51305,0.0504,0.06892,-0.06473
 "ehi_pers_mean",0.09704,0.18684,0.12424,0.06528,0.17831,0.23765,1,0.22369,0.12488,0.5826,0.03132,0.05133,-0.04906
 "ehi_val_mean",0.08007,0.13159,0.23448,0.04652,0.19688,0.19925,0.22369,1,0.02752,0.51294,0.01519,0.00599,-0.03791
 "ehi_life_mean",0.12507,0.12199,0.06907,0.29721,0.1283,0.0625,0.12488,0.02752,1,0.59399,0.07295,0.01639,-0.00581
 "ehi_global_mean",0.21415,0.26738,0.21756,0.23854,0.31052,0.51305,0.5826,0.51294,0.59399,1,0.0849,0.05133,-0.05746
 "AOT_total",0.06836,0.0916,0.10396,0.11795,0.11102,0.0504,0.03132,0.01519,0.07295,0.0849,1,0.2609,-0.20963
 "CRT_correct",0.05361,0.02566,0.07479,0.03949,0.06665,0.06892,0.05133,0.00599,0.01639,0.05133,0.2609,1,-0.87087
 "CRT_int",-0.05819,-0.01732,-0.06044,-0.0013,-0.05731,-0.06473,-0.04906,-0.03791,-0.00581,-0.05746,-0.20963,-0.87087,1
--- a/reliability.html
+++ b/reliability.html
@ -0,0 +1,145 @@
 <html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Cronbach's Alpha</h2><table>
 <thead>
  <tr>
   <th style="text-align:left;">   </th>
   <th style="text-align:right;"> raw_alpha </th>
   <th style="text-align:right;"> std.alpha </th>
   <th style="text-align:right;"> G6(smc) </th>
   <th style="text-align:right;"> average_r </th>
   <th style="text-align:right;"> S/N </th>
   <th style="text-align:right;"> ase </th>
   <th style="text-align:right;"> mean </th>
   <th style="text-align:right;"> sd </th>
   <th style="text-align:right;"> median_r </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;">  </td>
   <td style="text-align:right;"> 0.5709145 </td>
   <td style="text-align:right;"> 0.6860723 </td>
   <td style="text-align:right;"> 0.6450534 </td>
   <td style="text-align:right;"> 0.3533207 </td>
   <td style="text-align:right;"> 2.185446 </td>
   <td style="text-align:right;"> 0.0229978 </td>
   <td style="text-align:right;"> 0.138301 </td>
   <td style="text-align:right;"> 0.8228433 </td>
   <td style="text-align:right;"> 0.3517941 </td>
  </tr>
 </tbody>
 </table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.77987</p><h2>Item-Level Statistics</h2><table>
 <thead>
  <tr>
   <th style="text-align:left;">   </th>
   <th style="text-align:right;"> n </th>
   <th style="text-align:right;"> raw.r </th>
   <th style="text-align:right;"> std.r </th>
   <th style="text-align:right;"> r.cor </th>
   <th style="text-align:right;"> r.drop </th>
   <th style="text-align:right;"> mean </th>
   <th style="text-align:right;"> sd </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;"> ehiDGEN_5_mean </td>
   <td style="text-align:right;"> 489 </td>
   <td style="text-align:right;"> 0.7653498 </td>
   <td style="text-align:right;"> 0.6789150 </td>
   <td style="text-align:right;"> 0.5035473 </td>
   <td style="text-align:right;"> 0.4480613 </td>
   <td style="text-align:right;"> 0.0620314 </td>
   <td style="text-align:right;"> 1.4573526 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehiDGEN_10_mean </td>
   <td style="text-align:right;"> 489 </td>
   <td style="text-align:right;"> 0.8686359 </td>
   <td style="text-align:right;"> 0.7381306 </td>
   <td style="text-align:right;"> 0.6097580 </td>
   <td style="text-align:right;"> 0.5098844 </td>
   <td style="text-align:right;"> 0.3006135 </td>
   <td style="text-align:right;"> 1.8924455 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehi5_global_mean </td>
   <td style="text-align:right;"> 489 </td>
   <td style="text-align:right;"> 0.5032176 </td>
   <td style="text-align:right;"> 0.7114339 </td>
   <td style="text-align:right;"> 0.5651807 </td>
   <td style="text-align:right;"> 0.3871421 </td>
   <td style="text-align:right;"> 0.0675187 </td>
   <td style="text-align:right;"> 0.4620107 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehi10_global_mean </td>
   <td style="text-align:right;"> 489 </td>
   <td style="text-align:right;"> 0.5705196 </td>
   <td style="text-align:right;"> 0.7420342 </td>
   <td style="text-align:right;"> 0.6269733 </td>
   <td style="text-align:right;"> 0.4474763 </td>
   <td style="text-align:right;"> 0.1230402 </td>
   <td style="text-align:right;"> 0.5252241 </td>
  </tr>
 </tbody>
 </table><h3>Alpha if Item Dropped</h3><table>
 <thead>
  <tr>
   <th style="text-align:left;">   </th>
   <th style="text-align:right;"> raw_alpha </th>
   <th style="text-align:right;"> std.alpha </th>
   <th style="text-align:right;"> G6(smc) </th>
   <th style="text-align:right;"> average_r </th>
   <th style="text-align:right;"> S/N </th>
   <th style="text-align:right;"> alpha se </th>
   <th style="text-align:right;"> var.r </th>
   <th style="text-align:right;"> med.r </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td style="text-align:left;"> ehiDGEN_5_mean </td>
   <td style="text-align:right;"> 0.4125021 </td>
   <td style="text-align:right;"> 0.6576471 </td>
   <td style="text-align:right;"> 0.5754825 </td>
   <td style="text-align:right;"> 0.3903632 </td>
   <td style="text-align:right;"> 1.920963 </td>
   <td style="text-align:right;"> 0.0273016 </td>
   <td style="text-align:right;"> 0.0105384 </td>
   <td style="text-align:right;"> 0.4162485 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehiDGEN_10_mean </td>
   <td style="text-align:right;"> 0.4091906 </td>
   <td style="text-align:right;"> 0.6003996 </td>
   <td style="text-align:right;"> 0.5190903 </td>
   <td style="text-align:right;"> 0.3337035 </td>
   <td style="text-align:right;"> 1.502500 </td>
   <td style="text-align:right;"> 0.0345773 </td>
   <td style="text-align:right;"> 0.0161841 </td>
   <td style="text-align:right;"> 0.2873396 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehi5_global_mean </td>
   <td style="text-align:right;"> 0.5572211 </td>
   <td style="text-align:right;"> 0.6271436 </td>
   <td style="text-align:right;"> 0.5440163 </td>
   <td style="text-align:right;"> 0.3592479 </td>
   <td style="text-align:right;"> 1.681998 </td>
   <td style="text-align:right;"> 0.0259296 </td>
   <td style="text-align:right;"> 0.0113815 </td>
   <td style="text-align:right;"> 0.4162485 </td>
  </tr>
  <tr>
   <td style="text-align:left;"> ehi10_global_mean </td>
   <td style="text-align:right;"> 0.5282880 </td>
   <td style="text-align:right;"> 0.5963510 </td>
   <td style="text-align:right;"> 0.5041975 </td>
   <td style="text-align:right;"> 0.3299683 </td>
   <td style="text-align:right;"> 1.477400 </td>
   <td style="text-align:right;"> 0.0271341 </td>
   <td style="text-align:right;"> 0.0068450 </td>
   <td style="text-align:right;"> 0.2873396 </td>
  </tr>
 </tbody>
 </table></body></html>
--- a/eohi2/README_Variable_Creation.txt
+++ b/eohi2/README_Variable_Creation.txt
--- a/eohi2/RMD
+++ b/eohi2/RMD
--- a/eohi2/Rplots.pdf
+++ b/eohi2/Rplots.pdf
--- a/eohi2/STD_EHI_correlation_matrix.csv
+++ b/eohi2/STD_EHI_correlation_matrix.csv
@ -0,0 +1,7 @@
 ,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
 sex_dummy,0.079,,,,,
 demo_age_1,-0.22***,-0.028,,,,
 edu_num,-0.027,0.0094,-0.078,,,
 aot_total,-0.028,-0.072,-0.20***,0.10*,,
 crt_correct,0.11*,-0.16***,-0.0044,0.12**,-0.037,
 crt_int,-0.12**,0.14**,0.028,-0.11*,-0.011,-0.88***
--- a/eohi2/STD_EHI_correlation_pvalues.csv
+++ b/eohi2/STD_EHI_correlation_pvalues.csv
@ -0,0 +1,7 @@
 ,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
 sex_dummy,0.080650819,,,,,
 demo_age_1,1.40537E-06,0.536013997,,,,
 edu_num,0.551130545,0.835590798,0.086262901,,,
 aot_total,0.545115793,0.112852544,5.87906E-06,0.025659604,,
 crt_correct,0.011928378,0.000483116,0.923101803,0.00884239,0.4122211,
 crt_int,0.007552737,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
--- a/eohi2/correlation
+++ b/eohi2/correlation
@ -0,0 +1,100 @@
 options(scipen = 999)
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 df <- read.csv("eohi2.csv")
 data <- df %>%
  select(stdEHI_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
  filter(demo_sex != "Prefer not to say")
 print(colSums(is.na(data)))
 print(sapply(data, class))
 # Create dummy variable for sex (0 = Male, 1 = Female)
 data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
 # Verify the dummy coding
 print(table(data$demo_sex, data$sex_dummy))
 #descriptives
 # Descriptives for age
 print(summary(data$demo_age_1))
 print(sd(data$demo_age_1, na.rm = TRUE))
 # Descriptives for sex (frequency table)
 print(table(data$demo_sex))
 print(prop.table(table(data$demo_sex)))
 # Descriptives for sex dummy variable
 print(table(data$sex_dummy))
 # Convert edu3 to numeric factor for correlations (1, 2, 3)
 # First ensure edu3 is a factor, then convert to numeric
 data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
 data$edu_num <- as.numeric(data$edu3)
 # Check the numeric conversion
 print(table(data$edu_num, useNA = "ifany"))
 # Verify the conversion
 print(table(data$edu3, data$edu_num, useNA = "ifany"))
 ####correlation matrix ####
 # Select numeric variables for correlation matrix
 numeric_vars <- data %>%
  select(stdEHI_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
 # Create Spearman correlation matrix
 cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
 # Print correlation matrix
 print(round(cor_matrix, 3))
 # Get significance tests for correlations using psych package
 library(psych)
 # Create correlation matrix with significance tests
 cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
 # Print correlation matrix
 print(round(cor_test$r, 3))
 # Print p-values
 print(round(cor_test$p, 3))
 # Print all correlations with r and p values (for reporting)
 for(i in 1:nrow(cor_test$r)) {
  for(j in 1:ncol(cor_test$r)) {
    if(i != j) {  # Skip diagonal
      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
          ": r =", round(cor_test$r[i, j], 3), 
          ", p =", round(cor_test$p[i, j], 3), "\n")
    }
  }
 }
 # Also print significant correlations summary
 sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
 if(nrow(sig_cors) > 0) {
  for(i in 1:nrow(sig_cors)) {
    row_idx <- sig_cors[i, 1]
    col_idx <- sig_cors[i, 2]
    if(row_idx != col_idx) {  # Skip diagonal
      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
    }
  }
 }
 # Save correlation matrix and p-values to CSV files
 write.csv(cor_test$r, "STD_EHI_correlation_matrix.csv", row.names = TRUE)
 write.csv(cor_test$p, "STD_EHI_correlation_pvalues.csv", row.names = TRUE)
 print("Correlation matrix saved to STD_EHI_correlation_matrix.csv")
 print("P-values saved to STD_EHI_correlation_pvalues.csv")
--- a/eohi2/correlationCORRECT_exp2.csv
+++ b/eohi2/correlationCORRECT_exp2.csv
@ -0,0 +1,63 @@
 Variable1,Variable2,Spearman_r,P_value
 ehiDGEN_5_Pers,aot_total,-0.136252077,0.002533005
 ehi5_global_mean,crt_int,-0.111415172,0.013696124
 ehi5_pref_MEAN,crt_int,-0.111269786,0.013820353
 ehi5_pref_MEAN,crt_correct,0.092043471,0.041901166
 ehi5_global_mean,crt_correct,0.091771139,0.042513801
 ,,,
 ehi10_global_mean,crt_correct,0.086125328,0.057016745
 ehi10_global_mean,crt_int,-0.077752437,0.085875618
 ehi10_global_mean,aot_total,0.038353442,0.397405642
 ehi10_pers_MEAN,crt_int,-0.078358781,0.083451324
 ehi10_pers_MEAN,crt_correct,0.062086308,0.170454491
 ehi10_pers_MEAN,aot_total,0.008627548,0.849074663
 ehi10_pref_MEAN,crt_correct,0.076608495,0.090603647
 ehi10_pref_MEAN,crt_int,-0.020948272,0.64400852
 ehi10_pref_MEAN,aot_total,-0.006633556,0.883671698
 ehi10_val_MEAN,crt_correct,0.05989196,0.186098498
 ehi10_val_MEAN,crt_int,-0.057978747,0.200580439
 ehi10_val_MEAN,aot_total,-0.003835901,0.932573186
 ehi5.10_global_mean,crt_correct,0.033565462,0.458964083
 ehi5.10_global_mean,aot_total,-0.031442998,0.487869173
 ehi5.10_global_mean,crt_int,-0.023334683,0.606722562
 ehi5.10_pers_MEAN,crt_correct,0.030973863,0.494387766
 ehi5.10_pers_MEAN,crt_int,-0.02351475,0.603948546
 ehi5.10_pers_MEAN,aot_total,-0.006387901,0.887950695
 ehi5.10_pref_MEAN,aot_total,-0.012595122,0.781151691
 ehi5.10_pref_MEAN,crt_correct,-0.011596204,0.798118255
 ehi5.10_pref_MEAN,crt_int,0.00927269,0.837939503
 ehi5.10_val_MEAN,crt_int,-0.02343742,0.60513915
 ehi5.10_val_MEAN,aot_total,-0.01868822,0.680165418
 ehi5.10_val_MEAN,crt_correct,0.016882557,0.709594395
 ehi5_global_mean,aot_total,-0.012144664,0.788790373
 ehi5_pers_MEAN,crt_correct,0.041879574,0.35541717
 ehi5_pers_MEAN,crt_int,-0.038984568,0.38967884
 ehi5_pers_MEAN,aot_total,-0.002320273,0.959183882
 ehi5_pref_MEAN,aot_total,-0.009156402,0.839944313
 ehi5_val_MEAN,crt_int,-0.060691086,0.180283214
 ehi5_val_MEAN,aot_total,-0.043640227,0.335537257
 ehi5_val_MEAN,crt_correct,0.038349744,0.397451186
 ehiDGEN_10_mean,crt_int,-0.060692684,0.180271722
 ehiDGEN_10_mean,crt_correct,0.048693261,0.282530257
 ehiDGEN_10_mean,aot_total,-0.007003919,0.877226879
 ehiDGEN_10_Pers,crt_int,-0.08321639,0.065963243
 ehiDGEN_10_Pers,crt_correct,0.062811394,0.165507368
 ehiDGEN_10_Pers,aot_total,-0.005213733,0.908446087
 ehiDGEN_10_Pref,crt_correct,0.056920022,0.208938511
 ehiDGEN_10_Pref,crt_int,-0.054154647,0.231949195
 ehiDGEN_10_Pref,aot_total,0.012229383,0.787352177
 ehiDGEN_10_Val,crt_correct,-0.046875479,0.300907385
 ehiDGEN_10_Val,aot_total,-0.041722577,0.35722513
 ehiDGEN_10_Val,crt_int,0.029931685,0.509033307
 ehiDGEN_5_mean,aot_total,-0.068430961,0.130753688
 ehiDGEN_5_mean,crt_correct,0.047639811,0.293085717
 ehiDGEN_5_mean,crt_int,-0.042472828,0.348637399
 ehiDGEN_5_Pers,crt_correct,0.061428736,0.175035746
 ehiDGEN_5_Pers,crt_int,-0.038746148,0.392587023
 ehiDGEN_5_Pref,crt_int,-0.042178047,0.351995885
 ehiDGEN_5_Pref,aot_total,-0.038691604,0.393254174
 ehiDGEN_5_Pref,crt_correct,0.033401162,0.461166903
 ehiDGEN_5_Val,aot_total,-0.080834838,0.074119126
 ehiDGEN_5_Val,crt_int,-0.024116805,0.594715088
 ehiDGEN_5_Val,crt_correct,0.02187202,0.629462582
 ,,,
--- a/eohi2/correlation_matrix.csv
+++ b/eohi2/correlation_matrix.csv
@ -0,0 +1,14 @@
 ,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
 ehiDGEN_10_mean,0.42***,,,,,,,,
 ehi5_global_mean,0.21***,0.21***,,,,,,,
 ehi10_global_mean,0.19***,0.37***,0.43***,,,,,,
 sex_dummy,0.074,0.053,0.023,0.051,,,,,
 demo_age_1,-0.053,-0.22***,-0.21***,-0.26***,-0.028,,,,
 edu_num,-0.031,-0.037,0.083,-0.073,0.0094,-0.078,,,
 aot_total,-0.064,-0.0067,-0.0032,0.050,-0.072,-0.20***,0.10*,,
 crt_correct,0.045,0.053,0.093*,0.090*,-0.16***,-0.0044,0.12**,-0.037,
 crt_int,-0.041,-0.065,-0.11*,-0.083,0.14*,0.028,-0.11*,-0.011,-0.88***
 ,,,,,,,,,
 *p<0.05,,,,,,,,,
 **p<0.01,,,,,,,,,
 ***p<0.001,,,,,,,,,
--- a/eohi2/correlation_plot_domain_general_vars_spearman.pdf
+++ b/eohi2/correlation_plot_domain_general_vars_spearman.pdf
--- a/eohi2/correlation_plot_domain_vars_pearson.pdf
+++ b/eohi2/correlation_plot_domain_vars_pearson.pdf
--- a/eohi2/correlation_plot_domain_vars_spearman.pdf
+++ b/eohi2/correlation_plot_domain_vars_spearman.pdf
--- a/eohi2/correlation_plot_scales_spearman.pdf
+++ b/eohi2/correlation_plot_scales_spearman.pdf
--- a/eohi2/correlation_pvalues.csv
+++ b/eohi2/correlation_pvalues.csv
@ -0,0 +1,10 @@
 ,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
 ehiDGEN_10_mean,2.9929E-22,,,,,,,,
 ehi5_global_mean,4.14491E-06,3.02E-06,,,,,,,
 ehi10_global_mean,1.61573E-05,1.66719E-17,1.63546E-23,,,,,,
 sex_dummy,0.104170333,0.245085093,0.606827914,0.260991884,,,,,
 demo_age_1,0.239362544,1.57395E-06,4.57184E-06,5.31327E-09,0.536013997,,,,
 edu_num,0.489059792,0.41328311,0.067372333,0.10843647,0.835590798,0.086262901,,,
 aot_total,0.156912095,0.882372703,0.943759329,0.269428383,0.112852544,5.87906E-06,0.025659604,,
 crt_correct,0.319158511,0.24026811,0.039792024,0.047593264,0.000483116,0.923101803,0.00884239,0.4122211,
 crt_int,0.365601388,0.150705909,0.01133869,0.065890723,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,34 @@
 Variable1,Variable2,Spearman_r,P_value
 DGEN_10_global_mean,aot_total,0.393039545,0
 DGEN_5_global_mean,aot_total,0.401732294,0
 DGEN_fut_10_mean,aot_total,0.372705919,0
 DGEN_fut_5_mean,aot_total,0.400202502,0
 DGEN_past_10_mean,aot_total,0.368847521,0
 DGEN_past_5_mean,aot_total,0.376820454,0
 DGENfut_global_mean,aot_total,0.382481063,0
 DGENpast_global_mean,aot_total,0.369258554,0
 DGEN_fut_5_mean,crt_correct,-0.08823341,0.051182461
 DGENfut_global_mean,crt_correct,-0.086607229,0.05563658
 DGEN_5_global_mean,crt_correct,-0.079087177,0.080612427
 DGEN_fut_10_mean,crt_correct,-0.073786402,0.103162718
 DGEN_fut_5.10_mean,aot_total,0.06505662,0.150871574
 DGEN_10_global_mean,crt_correct,-0.060769258,0.17972166
 DGEN_5.10_global_mean,aot_total,0.058102985,0.199615824
 DGEN_past_5_mean,crt_correct,-0.057080059,0.207659218
 DGEN_fut_5.10_mean,crt_correct,-0.052644919,0.245242874
 DGEN_past_5.10_mean,aot_total,0.050343754,0.266514845
 DGEN_fut_5.10_mean,crt_int,0.049423543,0.275365524
 DGENpast_global_mean,crt_correct,-0.048081298,0.288630365
 DGEN_past_10_mean,crt_int,-0.042776663,0.345197086
 DGEN_5.10_global_mean,crt_correct,-0.036910124,0.415418734
 DGENpast_global_mean,crt_int,-0.035697633,0.430916171
 DGEN_past_10_mean,crt_correct,-0.035562484,0.432664043
 DGEN_past_5_mean,crt_int,-0.031740856,0.483754572
 DGEN_10_global_mean,crt_int,-0.019205703,0.671817818
 DGEN_past_5.10_mean,crt_int,-0.015826279,0.727015617
 DGEN_5_global_mean,crt_int,-0.014881366,0.742720553
 DGEN_past_5.10_mean,crt_correct,-0.013651301,0.763324994
 DGEN_5.10_global_mean,crt_int,0.011869586,0.793465063
 DGENfut_global_mean,crt_int,0.009764448,0.829473295
 DGEN_fut_10_mean,crt_int,0.002368134,0.95834271
 DGEN_fut_5_mean,crt_int,-0.0005323,0.990632398
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,175 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load required libraries
 library(corrplot)
 library(Hmisc)
 library(psych)
 # Load the data
 exp2_data <- read.csv("eohi2.csv")
 # Define the two sets of variables
 set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
 set2_vars <- c("aot_total", "crt_correct", "crt_int")
 # Create subset with only the variables of interest
 correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
 # ===== NORMALITY CHECKS =====
 # Shapiro-Wilk tests for normality (n < 5000)
 for(var in names(correlation_data)) {
  if(length(na.omit(correlation_data[[var]])) <= 5000) {
    shapiro_result <- shapiro.test(correlation_data[[var]])
    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
                var, shapiro_result$p.value, 
                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
  }
 }
 # Visual normality checks
 pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
 par(mfrow = c(2, 4))
 for(var in names(correlation_data)) {
  # Histogram with normal curve overlay
  hist(correlation_data[[var]], main = paste("Histogram:", var), 
       xlab = var, freq = FALSE)
  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
        add = TRUE, col = "red", lwd = 2)
  # Q-Q plot
  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
  qqline(correlation_data[[var]], col = "red", lwd = 2)
 }
 dev.off()
 # ===== LINEARITY CHECKS =====
 # Check linearity between variable pairs
 pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
 par(mfrow = c(4, 3))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    # Scatter plot with regression line
    plot(correlation_data[[var1]], correlation_data[[var2]], 
         main = paste(var1, "vs", var2),
         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
    # Add linear regression line
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    abline(lm_fit, col = "red", lwd = 2)
    # Add LOESS smooth line for non-linear pattern detection
    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
    loess_pred <- predict(loess_fit, x_seq)
    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
    # Calculate R-squared for linear fit
    r_squared <- summary(lm_fit)$r.squared
    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
  }
 }
 dev.off()
 # Residual analysis for linearity
 pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
 par(mfrow = c(4, 3))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    residuals <- residuals(lm_fit)
    fitted <- fitted(lm_fit)
    plot(fitted, residuals, 
         main = paste("Residuals:", var1, "vs", var2),
         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
    abline(h = 0, col = "red", lwd = 2)
    # Add smooth line to residuals
    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
  }
 }
 dev.off()
 # Calculate Spearman correlation matrix only
 cor_matrix_spearman <- cor(correlation_data, method = "spearman")
 # Print correlation matrix with 5 decimal places
 print(round(cor_matrix_spearman, 5))
 # Separate correlations between the two sets (Spearman)
 set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
 print(round(set1_set2_cor, 5))
 # Calculate correlations within each set (Spearman)
 set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
 set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
 # Statistical significance tests (Spearman)
 cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
  }
 }
 # Create correlation plot for Spearman only
 pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
 corrplot(cor_matrix_spearman, method = "color", type = "upper", 
         order = "hclust", tl.cex = 0.8, tl.col = "black",
         addCoef.col = "black", number.cex = 0.7,
         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
 dev.off()
 # Summary statistics
 desc_stats <- describe(correlation_data)
 print(round(desc_stats, 5))
 # Save results to CSV files
 write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
 write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
 # Save correlation results in a formatted table
 cor_results <- data.frame(
  Variable1 = character(),
  Variable2 = character(),
  Spearman_r = numeric(),
  P_value = numeric(),
  stringsAsFactors = FALSE
 )
 # Extract correlations between sets
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    r_val <- cor_matrix_spearman[var1, var2]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cor_results <- rbind(cor_results, data.frame(
      Variable1 = var1,
      Variable2 = var2,
      Spearman_r = r_val,
      P_value = p_val,
      stringsAsFactors = FALSE
    ))
  }
 }
 write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,34 @@
 Variable1,Variable2,Spearman_r,P_value
 N10_global_mean,aot_total,0.150441363,0.000846088
 N5_global_mean,aot_total,0.185910923,3.52323E-05
 NFut_10_mean,aot_total,0.155140023,0.000575829
 NFut_5_mean,aot_total,0.199536342,8.75208E-06
 NFut_global_mean,aot_total,0.185343328,3.72583E-05
 NPast_10_mean,aot_total,0.143227088,0.001495728
 NPast_5_mean,aot_total,0.150454018,0.000845224
 NPast_global_mean,aot_total,0.148341676,0.001001318
 X5.10_global_mean,aot_total,0.206551087,4.11273E-06
 X5.10fut_mean,aot_total,0.208127536,3.45821E-06
 X5.10past_mean,aot_total,0.168898536,0.000175189
 NFut_5_mean,crt_correct,-0.055312717,0.222103555
 NPast_5_mean,crt_int,-0.053658543,0.236260102
 NPast_global_mean,crt_int,-0.043976695,0.33182097
 NPast_10_mean,crt_int,-0.043338736,0.33888983
 NFut_global_mean,crt_correct,-0.036977473,0.414567649
 X5.10fut_mean,crt_int,-0.030131358,0.506209892
 NPast_5_mean,crt_correct,0.028880988,0.524024755
 NFut_5_mean,crt_int,0.027093379,0.550039031
 X5.10_global_mean,crt_int,-0.025449064,0.574514939
 NPast_10_mean,crt_correct,0.021023332,0.642821387
 NPast_global_mean,crt_correct,0.020816029,0.646102235
 NFut_10_mean,crt_correct,-0.020486277,0.651335162
 X5.10past_mean,crt_int,-0.018645297,0.680859581
 N5_global_mean,crt_correct,-0.018075161,0.690105495
 N10_global_mean,crt_int,-0.016544862,0.71514808
 NFut_10_mean,crt_int,-0.009395086,0.835830518
 NFut_global_mean,crt_int,0.008419817,0.852666611
 N10_global_mean,crt_correct,-0.008133995,0.857613839
 N5_global_mean,crt_int,-0.007330571,0.87154938
 X5.10fut_mean,crt_correct,-0.005165193,0.909294754
 X5.10_global_mean,crt_correct,-0.004538036,0.920269031
 X5.10past_mean,crt_correct,0.003021813,0.94685912
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,197 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load required libraries
 library(corrplot)
 library(Hmisc)
 library(psych)
 # Load the data
 exp2_data <- read.csv("eohi2.csv")
 # Define the two sets of variables
 set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
               "NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
 set2_vars <- c("aot_total", "crt_correct", "crt_int")
 # Create subset with only the variables of interest
 correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
 # ===== NORMALITY CHECKS =====
 # Shapiro-Wilk tests for normality (n < 5000)
 for(var in names(correlation_data)) {
  if(length(na.omit(correlation_data[[var]])) <= 5000) {
    shapiro_result <- shapiro.test(correlation_data[[var]])
    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
                var, shapiro_result$p.value, 
                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
  }
 }
 # Kolmogorov-Smirnov test for normality
 for(var in names(correlation_data)) {
  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
                       mean = mean(correlation_data[[var]], na.rm = TRUE),
                       sd = sd(correlation_data[[var]], na.rm = TRUE))
  cat(sprintf("%s: KS p = %.5f %s\n", 
              var, ks_result$p.value,
              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
 }
 # Visual normality checks
 pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
 par(mfrow = c(2, 4))
 for(var in names(correlation_data)) {
  # Histogram with normal curve overlay
  hist(correlation_data[[var]], main = paste("Histogram:", var), 
       xlab = var, freq = FALSE)
  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
        add = TRUE, col = "red", lwd = 2)
  # Q-Q plot
  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
  qqline(correlation_data[[var]], col = "red", lwd = 2)
 }
 dev.off()
 # ===== LINEARITY CHECKS =====
 # Check linearity between variable pairs
 pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
 par(mfrow = c(4, 3))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    # Scatter plot with regression line
    plot(correlation_data[[var1]], correlation_data[[var2]], 
         main = paste(var1, "vs", var2),
         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
    # Add linear regression line
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    abline(lm_fit, col = "red", lwd = 2)
    # Add LOESS smooth line for non-linear pattern detection
    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
    loess_pred <- predict(loess_fit, x_seq)
    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
    # Calculate R-squared for linear fit
    r_squared <- summary(lm_fit)$r.squared
    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
  }
 }
 dev.off()
 # Residual analysis for linearity
 pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
 par(mfrow = c(4, 3))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    residuals <- residuals(lm_fit)
    fitted <- fitted(lm_fit)
    plot(fitted, residuals, 
         main = paste("Residuals:", var1, "vs", var2),
         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
    abline(h = 0, col = "red", lwd = 2)
    # Add smooth line to residuals
    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
  }
 }
 dev.off()
 # Calculate correlation matrices (both Pearson and Spearman)
 cor_matrix_pearson <- cor(correlation_data, method = "pearson")
 cor_matrix_spearman <- cor(correlation_data, method = "spearman")
 # Use Spearman as primary method
 cor_matrix <- cor_matrix_spearman
 # Print correlation matrices with 5 decimal places
 print(round(cor_matrix_spearman, 5))
 # Separate correlations between the two sets (Spearman)
 set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
 print(round(set1_set2_cor, 5))
 # Calculate correlations within each set (Spearman)
 set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
 set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
 # Statistical significance tests (Spearman)
 cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
  }
 }
 # Create correlation plots for both methods
 pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
 corrplot(cor_matrix_spearman, method = "color", type = "upper", 
         order = "hclust", tl.cex = 0.8, tl.col = "black",
         addCoef.col = "black", number.cex = 0.7,
         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
 dev.off()
 pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
 corrplot(cor_matrix_pearson, method = "color", type = "upper", 
         order = "hclust", tl.cex = 0.8, tl.col = "black",
         addCoef.col = "black", number.cex = 0.7,
         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
 dev.off()
 # Summary statistics
 desc_stats <- describe(correlation_data)
 print(round(desc_stats, 5))
 # Save results to CSV files
 write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
 write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
 write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
 # Save correlation results in a formatted table
 cor_results <- data.frame(
  Variable1 = character(),
  Variable2 = character(),
  Spearman_r = numeric(),
  P_value = numeric(),
  stringsAsFactors = FALSE
 )
 # Extract significant correlations between sets
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    r_val <- cor_matrix_spearman[var1, var2]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cor_results <- rbind(cor_results, data.frame(
      Variable1 = var1,
      Variable2 = var2,
      Spearman_r = r_val,
      P_value = p_val,
      stringsAsFactors = FALSE
    ))
  }
 }
 write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,176 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load required libraries
 library(corrplot)
 library(Hmisc)
 library(psych)
 # Load the data
 exp1_data <- read.csv("eohi2.csv")
 # Define the two sets of variables
 set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
 set2_vars <- c("aot_total", "crt_correct", "crt_int")
 # Create subset with only the variables of interest
 correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
 # ===== NORMALITY CHECKS =====
 # Shapiro-Wilk tests for normality
 for(var in names(correlation_data)) {
  shapiro_result <- shapiro.test(correlation_data[[var]])
  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
              var, shapiro_result$p.value, 
              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
 }
 # Visual normality checks
 pdf("normality_plots.pdf", width = 12, height = 8)
 par(mfrow = c(2, 4))
 for(var in names(correlation_data)) {
  # Histogram with normal curve overlay
  hist(correlation_data[[var]], main = paste("Histogram:", var), 
       xlab = var, freq = FALSE)
  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
        add = TRUE, col = "red", lwd = 2)
  # Q-Q plot
  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
  qqline(correlation_data[[var]], col = "red", lwd = 2)
 }
 dev.off()
 # ===== LINEARITY CHECKS =====
 # Check linearity between variable pairs
 pdf("linearity_plots.pdf", width = 15, height = 10)
 par(mfrow = c(3, 5))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    # Scatter plot with regression line
    plot(correlation_data[[var1]], correlation_data[[var2]], 
         main = paste(var1, "vs", var2),
         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
    # Add linear regression line
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    abline(lm_fit, col = "red", lwd = 2)
    # Add LOESS smooth line for non-linear pattern detection
    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
    loess_pred <- predict(loess_fit, x_seq)
    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
    # Calculate R-squared for linear fit
    r_squared <- summary(lm_fit)$r.squared
    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
  }
 }
 dev.off()
 # Residual analysis for linearity
 pdf("residual_plots.pdf", width = 15, height = 10)
 par(mfrow = c(3, 5))
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
    residuals <- residuals(lm_fit)
    fitted <- fitted(lm_fit)
    plot(fitted, residuals, 
         main = paste("Residuals:", var1, "vs", var2),
         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
    abline(h = 0, col = "red", lwd = 2)
    # Add smooth line to residuals
    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
  }
 }
 dev.off()
 # Calculate correlation matrix (Spearman only)
 cor_matrix_spearman <- cor(correlation_data, method = "spearman")
 # Print correlation matrix with 5 decimal places
 print(round(cor_matrix_spearman, 5))
 # Separate correlations between the two sets (Spearman)
 set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
 print(round(set1_set2_cor, 5))
 # Calculate correlations within each set (Spearman)
 set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
 set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
 # Statistical significance tests (Spearman)
 cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
  }
 }
 # Create correlation plot (Spearman only)
 pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
 corrplot(cor_matrix_spearman, method = "color", type = "upper", 
         order = "hclust", tl.cex = 0.8, tl.col = "black",
         addCoef.col = "black", number.cex = 0.7,
         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
 dev.off()
 # Summary statistics
 desc_stats <- describe(correlation_data)
 print(round(desc_stats, 5))
 # Save results to CSV files
 write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
 write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
 # Save correlation results in a formatted table
 cor_results <- data.frame(
  Variable1 = character(),
  Variable2 = character(),
  Spearman_r = numeric(),
  P_value = numeric(),
  stringsAsFactors = FALSE
 )
 # Extract significant correlations between sets
 for(i in 1:length(set1_vars)) {
  for(j in 1:length(set2_vars)) {
    var1 <- set1_vars[i]
    var2 <- set2_vars[j]
    r_val <- cor_matrix_spearman[var1, var2]
    p_val <- cor_test_results_spearman$P[var1, var2]
    cor_results <- rbind(cor_results, data.frame(
      Variable1 = var1,
      Variable2 = var2,
      Spearman_r = r_val,
      P_value = p_val,
      stringsAsFactors = FALSE
    ))
  }
 }
 write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,266 @@
 # Script to combine and recode Likert scale items in eohi2.csv
 # Combines 01 and 02 versions of items, then recodes text to numeric values
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings="" keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define the mapping function
 recode_likert <- function(x) {
  case_when(
    tolower(x) == "strongly disagree" ~ -3,
    tolower(x) == "disagree" ~ -2,
    tolower(x) == "somewhat disagree" ~ -1,
    tolower(x) == "neither agree nor disagree" ~ 0,
    tolower(x) == "somewhat agree" ~ 1,
    tolower(x) == "agree" ~ 2,
    tolower(x) == "strongly agree" ~ 3,
    TRUE ~ NA_real_
  )
 }
 # Define source column pairs (Set A and Set B)
 source_cols_A <- c(
  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
 )
 source_cols_B <- c(
  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
 )
 # Define target column names
 target_cols <- c(
  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
 )
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Print first 30 actual column names for debugging
 cat("First 30 actual column names in CSV:\n")
 for (i in 1:min(30, length(df_cols))) {
  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
 }
 cat("\n")
 # Check Source A columns
 missing_A <- source_cols_A[!source_cols_A %in% df_cols]
 existing_A <- source_cols_A[source_cols_A %in% df_cols]
 cat("Source Set A:\n")
 cat("  Expected: 60 columns\n")
 cat("  Found:", length(existing_A), "columns\n")
 cat("  Missing:", length(missing_A), "columns\n")
 if (length(missing_A) > 0) {
  cat("\n  Missing columns from Set A:\n")
  for (col in missing_A) {
    cat("    -", col, "\n")
  }
 }
 # Check Source B columns
 missing_B <- source_cols_B[!source_cols_B %in% df_cols]
 existing_B <- source_cols_B[source_cols_B %in% df_cols]
 cat("\nSource Set B:\n")
 cat("  Expected: 60 columns\n")
 cat("  Found:", length(existing_B), "columns\n")
 cat("  Missing:", length(missing_B), "columns\n")
 if (length(missing_B) > 0) {
  cat("\n  Missing columns from Set B:\n")
  for (col in missing_B) {
    cat("    -", col, "\n")
  }
 }
 # Check for columns with similar names (potential typos/spaces)
 if (length(missing_A) > 0 || length(missing_B) > 0) {
  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
  all_missing <- c(missing_A, missing_B)
  for (miss_col in all_missing) {
    # Find columns that start with similar pattern
    pattern <- substr(miss_col, 1, 10)
    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
    if (length(similar) > 0) {
      cat("\nLooking for:", miss_col)
      cat("\n  Similar columns found:\n")
      for (sim in similar) {
        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
      }
    }
  }
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_A) > 30 || length(missing_B) > 30) {
  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # Process each pair of columns
 for (i in 1:60) {
  col_A <- source_cols_A[i]
  col_B <- source_cols_B[i]
  target_col <- target_cols[i]
  # Get values from columns, handling missing columns
  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
  # Coalesce: take value from vals_A if present, otherwise from vals_B
  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
                     vals_A, 
                     vals_B)
  # Recode to numeric
  df[[target_col]] <- recode_likert(combined)
  # Print progress
  cat("Processed:", target_col, "\n")
 }
 # ============= VERIFY TARGET COLUMNS WERE CREATED =============
 cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
 # Get updated column names
 df_cols_after <- trimws(names(df))
 # Check which target columns exist
 existing_targets <- target_cols[target_cols %in% df_cols_after]
 missing_targets <- target_cols[!target_cols %in% df_cols_after]
 cat("Target Columns:\n")
 cat("  Expected: 60 columns\n")
 cat("  Created:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  WARNING: The following target columns were NOT created:\n")
  for (col in missing_targets) {
    cat("    -", col, "\n")
  }
  stop("\nERROR: Not all target columns were created successfully!")
 } else {
  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
 }
 cat("\n=== END VERIFICATION ===\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
 # This function can be run multiple times to check different random rows
 qa_check_random_row <- function() {
  # Pick a random row
  random_row <- sample(1:nrow(df), 1)
  cat("\n========================================\n")
  cat("QA CHECK: Random Row #", random_row, "\n")
  cat("========================================\n\n")
  # Check each of the 60 pairs
  for (i in 1:60) {
    col_A <- source_cols_A[i]
    col_B <- source_cols_B[i]
    target_col <- target_cols[i]
    # Get values
    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
    target_val <- df[random_row, target_col]
    # Determine which source had the value
    has_val_A <- !is.na(val_A) && val_A != ""
    has_val_B <- !is.na(val_B) && val_B != ""
    if (has_val_A) {
      source_used <- "A"
      original_text <- val_A
    } else if (has_val_B) {
      source_used <- "B"
      original_text <- val_B
    } else {
      source_used <- "NONE"
      original_text <- "(empty)"
    }
    # Print the info
    cat(sprintf("Pair %2d:\n", i))
    cat(sprintf("  Source A: %-30s\n", col_A))
    cat(sprintf("  Source B: %-30s\n", col_B))
    cat(sprintf("  Target:   %-30s\n", target_col))
    cat(sprintf("  Value found in: Source %s\n", source_used))
    cat(sprintf("  Original text:  '%s'\n", original_text))
    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
    cat("\n")
  }
  cat("========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on first random row
 cat("\n\n")
 qa_check_random_row()
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
 cat("Run this command in R console:\n")
 cat("  qa_check_random_row()\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,192 @@
 # Script to recode present-time Likert scale items in eohi2.csv
 # Recodes prePrefItem, prePersItem, and preValItem to numeric values
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings=NULL keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define the mapping function
 recode_likert <- function(x) {
  case_when(
    tolower(x) == "strongly disagree" ~ -3,
    tolower(x) == "disagree" ~ -2,
    tolower(x) == "somewhat disagree" ~ -1,
    tolower(x) == "neither agree nor disagree" ~ 0,
    tolower(x) == "somewhat agree" ~ 1,
    tolower(x) == "agree" ~ 2,
    tolower(x) == "strongly agree" ~ 3,
    TRUE ~ NA_real_
  )
 }
 # Define source columns (15 columns total)
 source_cols <- c(
  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
 )
 # Define target column names (15 columns total)
 target_cols <- c(
  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
 )
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Print first 30 actual column names for debugging
 cat("First 30 actual column names in CSV:\n")
 for (i in 1:min(30, length(df_cols))) {
  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
 }
 cat("\n")
 # Check Source columns
 missing_source <- source_cols[!source_cols %in% df_cols]
 existing_source <- source_cols[source_cols %in% df_cols]
 cat("Source Columns:\n")
 cat("  Expected: 15 columns\n")
 cat("  Found:", length(existing_source), "columns\n")
 cat("  Missing:", length(missing_source), "columns\n")
 if (length(missing_source) > 0) {
  cat("\n  Missing columns:\n")
  for (col in missing_source) {
    cat("    -", col, "\n")
  }
 }
 # Check for columns with similar names (potential typos/spaces)
 if (length(missing_source) > 0) {
  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
  for (miss_col in missing_source) {
    # Find columns that start with similar pattern
    pattern <- substr(miss_col, 1, 10)
    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
    if (length(similar) > 0) {
      cat("\nLooking for:", miss_col)
      cat("\n  Similar columns found:\n")
      for (sim in similar) {
        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
      }
    }
  }
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_source) > 7) {
  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # Check if target columns exist in the dataframe
 cat("\n=== CHECKING TARGET COLUMNS ===\n")
 existing_targets <- target_cols[target_cols %in% df_cols]
 missing_targets <- target_cols[!target_cols %in% df_cols]
 cat("Target Columns:\n")
 cat("  Expected: 15 columns\n")
 cat("  Found:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  Target columns do NOT exist yet - will create them.\n")
  if (length(existing_targets) > 0) {
    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
  }
 } else {
  cat("  All target columns exist - will overwrite with recoded values.\n")
 }
 cat("\n")
 # Process each column (overwrite existing target columns with recoded values)
 for (i in 1:15) {
  source_col <- source_cols[i]
  target_col <- target_cols[i]
  # Get values from source column, handling missing columns
  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
  # Recode to numeric and overwrite existing target column
  df[[target_col]] <- recode_likert(source_vals)
  # Print progress
  cat("Processed:", target_col, "\n")
 }
 cat("\n=== RECODING COMPLETE ===\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
 # This function can be run multiple times to check different random rows
 qa_check_random_row <- function() {
  # Pick a random row
  random_row <- sample(1:nrow(df), 1)
  cat("\n========================================\n")
  cat("QA CHECK: Random Row #", random_row, "\n")
  cat("========================================\n\n")
  # Check each of the 15 columns
  for (i in 1:15) {
    source_col <- source_cols[i]
    target_col <- target_cols[i]
    # Get values
    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
    target_val <- df[random_row, target_col]
    # Determine if source has a value
    has_val <- !is.na(source_val) && source_val != ""
    original_text <- if (has_val) source_val else "(empty)"
    # Print the info
    cat(sprintf("Column %2d:\n", i))
    cat(sprintf("  Source:   %-30s\n", source_col))
    cat(sprintf("  Target:   %-30s\n", target_col))
    cat(sprintf("  Original text:  '%s'\n", original_text))
    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
    cat("\n")
  }
  cat("========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on first random row
 cat("\n\n")
 qa_check_random_row()
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
 cat("Run this command in R console:\n")
 cat("  qa_check_random_row()\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 # COMMENTED OUT FOR REVIEW - Uncomment when ready to save
 write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
 cat("Review the output above, then uncomment line 189 to save changes.\n")
 cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,253 @@
 # Script to combine DGEN variables in eohi2.csv
 # Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings=NULL keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define source column pairs (Set A and Set B)
 # NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
 source_cols_A <- c(
  "01past5PrefDGEN_1",
  "01past5PersDGEN_1",
  "01past5ValDGEN_1",
  "01past10PrefDGEN_1",
  "01past10PersDGEN_1",
  "01past10ValDGEN_1",
  "01fut5PrefDGEN_8",
  "01fut5PersDGEN_8",
  "01fut5ValuesDGEN_1",
  "01fut10PrefDGEN_8",
  "01fut10PersDGEN_8",
  "01fut10ValuesDGEN_1"
 )
 source_cols_B <- c(
  "02past5PrefDGEN_1",
  "02past5PersDGEN_1",
  "02past5ValDGEN_1",
  "02past10PrefDGEN_1",
  "02past10PersDGEN_1",
  "02past10ValDGEN_1",
  "02fut5PrefDGEN_8",
  "02fut5PersDGEN_8",
  "02fut5ValDGEN_1",
  "02fut10PrefDGEN_8",
  "02fut10PersDGEN_8",
  "02fut10ValDGEN_1"
 )
 # Define target column names
 target_cols <- c(
  "DGEN_past_5_Pref",
  "DGEN_past_5_Pers",
  "DGEN_past_5_Val",
  "DGEN_past_10_Pref",
  "DGEN_past_10_Pers",
  "DGEN_past_10_Val",
  "DGEN_fut_5_Pref",
  "DGEN_fut_5_Pers",
  "DGEN_fut_5_Val",
  "DGEN_fut_10_Pref",
  "DGEN_fut_10_Pers",
  "DGEN_fut_10_Val"
 )
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Print first 30 actual column names for debugging
 cat("First 30 actual column names in CSV:\n")
 for (i in 1:min(30, length(df_cols))) {
  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
 }
 cat("\n")
 # Check Source A columns
 missing_A <- source_cols_A[!source_cols_A %in% df_cols]
 existing_A <- source_cols_A[source_cols_A %in% df_cols]
 cat("Source Set A:\n")
 cat("  Expected: 12 columns\n")
 cat("  Found:", length(existing_A), "columns\n")
 cat("  Missing:", length(missing_A), "columns\n")
 if (length(missing_A) > 0) {
  cat("\n  Missing columns from Set A:\n")
  for (col in missing_A) {
    cat("    -", col, "\n")
  }
 }
 # Check Source B columns
 missing_B <- source_cols_B[!source_cols_B %in% df_cols]
 existing_B <- source_cols_B[source_cols_B %in% df_cols]
 cat("\nSource Set B:\n")
 cat("  Expected: 12 columns\n")
 cat("  Found:", length(existing_B), "columns\n")
 cat("  Missing:", length(missing_B), "columns\n")
 if (length(missing_B) > 0) {
  cat("\n  Missing columns from Set B:\n")
  for (col in missing_B) {
    cat("    -", col, "\n")
  }
 }
 # Check for columns with similar names (potential typos/spaces)
 if (length(missing_A) > 0 || length(missing_B) > 0) {
  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
  all_missing <- c(missing_A, missing_B)
  for (miss_col in all_missing) {
    # Find columns that start with similar pattern
    pattern <- substr(miss_col, 1, 10)
    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
    if (length(similar) > 0) {
      cat("\nLooking for:", miss_col)
      cat("\n  Similar columns found:\n")
      for (sim in similar) {
        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
      }
    }
  }
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_A) > 6 || length(missing_B) > 6) {
  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # Process each pair of columns (just copy values, no recoding)
 for (i in 1:12) {
  col_A <- source_cols_A[i]
  col_B <- source_cols_B[i]
  target_col <- target_cols[i]
  # Get values from columns, handling missing columns
  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
  # Coalesce: take value from vals_A if present, otherwise from vals_B
  # No recoding - just copy the value directly
  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
                     vals_A, 
                     vals_B)
  # Copy directly to target column (no recoding)
  df[[target_col]] <- combined
  # Print progress
  cat("Processed:", target_col, "\n")
 }
 # ============= VERIFY TARGET COLUMNS WERE CREATED =============
 cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
 # Get updated column names
 df_cols_after <- trimws(names(df))
 # Check which target columns exist
 existing_targets <- target_cols[target_cols %in% df_cols_after]
 missing_targets <- target_cols[!target_cols %in% df_cols_after]
 cat("Target Columns:\n")
 cat("  Expected: 12 columns\n")
 cat("  Created:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  WARNING: The following target columns were NOT created:\n")
  for (col in missing_targets) {
    cat("    -", col, "\n")
  }
  stop("\nERROR: Not all target columns were created successfully!")
 } else {
  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
 }
 cat("\n=== END VERIFICATION ===\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
 # This function can be run multiple times to check different random rows
 qa_check_random_row <- function() {
  # Pick a random row
  random_row <- sample(1:nrow(df), 1)
  cat("\n========================================\n")
  cat("QA CHECK: Random Row #", random_row, "\n")
  cat("========================================\n\n")
  # Check each of the 12 pairs
  for (i in 1:12) {
    col_A <- source_cols_A[i]
    col_B <- source_cols_B[i]
    target_col <- target_cols[i]
    # Get values
    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
    target_val <- df[random_row, target_col]
    # Determine which source had the value
    has_val_A <- !is.na(val_A) && val_A != ""
    has_val_B <- !is.na(val_B) && val_B != ""
    if (has_val_A) {
      source_used <- "A"
      original_value <- val_A
    } else if (has_val_B) {
      source_used <- "B"
      original_value <- val_B
    } else {
      source_used <- "NONE"
      original_value <- "(empty)"
    }
    # Print the info
    cat(sprintf("Pair %2d:\n", i))
    cat(sprintf("  Source A: %-30s\n", col_A))
    cat(sprintf("  Source B: %-30s\n", col_B))
    cat(sprintf("  Target:   %-30s\n", target_col))
    cat(sprintf("  Value found in: Source %s\n", source_used))
    cat(sprintf("  Original value: '%s'\n", original_value))
    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
    cat("\n")
  }
  cat("========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on first random row
 cat("\n\n")
 qa_check_random_row()
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
 cat("Run this command in R console:\n")
 cat("  qa_check_random_row()\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,183 @@
 # Script to calculate DGEN means by time period in eohi2.csv
 # Averages the 3 domain scores (Pref, Pers, Val) for each time period
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings=NULL keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define source columns (12 total)
 source_cols <- c(
  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
 )
 # Define target columns (4 total)
 target_cols <- c(
  "DGEN_past_5_mean",
  "DGEN_past_10_mean",
  "DGEN_fut_5_mean",
  "DGEN_fut_10_mean"
 )
 # Define groupings: each target gets 3 source columns
 source_groups <- list(
  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
 )
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Check Source columns
 missing_source <- source_cols[!source_cols %in% df_cols]
 existing_source <- source_cols[source_cols %in% df_cols]
 cat("Source Columns:\n")
 cat("  Expected: 12 columns\n")
 cat("  Found:", length(existing_source), "columns\n")
 cat("  Missing:", length(missing_source), "columns\n")
 if (length(missing_source) > 0) {
  cat("\n  Missing source columns:\n")
  for (col in missing_source) {
    cat("    -", col, "\n")
  }
 }
 # Check Target columns
 missing_targets <- target_cols[!target_cols %in% df_cols]
 existing_targets <- target_cols[target_cols %in% df_cols]
 cat("\nTarget Columns:\n")
 cat("  Expected: 4 columns\n")
 cat("  Found:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  Target columns do NOT exist yet - will create them.\n")
  if (length(existing_targets) > 0) {
    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
  }
 } else {
  cat("  All target columns exist - will overwrite with calculated values.\n")
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_source) > 6) {
  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # ============= CALCULATE MEANS =============
 cat("Calculating DGEN means by time period...\n")
 # Convert source columns to numeric
 for (col in source_cols) {
  if (col %in% names(df)) {
    df[[col]] <- as.numeric(df[[col]])
  }
 }
 # Calculate each target as the mean of its 3 source columns
 for (target in target_cols) {
  source_group <- source_groups[[target]]
  # Get the columns that exist
  existing_cols <- source_group[source_group %in% names(df)]
  if (length(existing_cols) > 0) {
    # Calculate row means across the 3 domain columns
    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
    cat("  Processed:", target, "\n")
  } else {
    cat("  WARNING: No source columns found for", target, "\n")
  }
 }
 cat("\n=== CALCULATION COMPLETE ===\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
 # This function can be run multiple times to check different random rows
 qa_check_random_row <- function() {
  # Pick a random row
  random_row <- sample(1:nrow(df), 1)
  cat("\n========================================\n")
  cat("QA CHECK: Random Row #", random_row, "\n")
  cat("========================================\n\n")
  # Check each of the 4 target columns
  for (target in target_cols) {
    source_group <- source_groups[[target]]
    cat(sprintf("Target: %s\n", target))
    cat("  Source columns:\n")
    # Get values from source columns
    values <- numeric(3)
    for (i in 1:3) {
      col <- source_group[i]
      val <- if (col %in% names(df)) df[random_row, col] else NA
      values[i] <- val
      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
    }
    # Calculate expected mean
    valid_values <- values[!is.na(values)]
    if (length(valid_values) > 0) {
      expected_mean <- mean(valid_values)
      actual_value <- df[random_row, target]
      cat(sprintf("\n  Calculation:\n"))
      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
      cat(sprintf("    Target value: %.5f\n", actual_value))
      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
    } else {
      cat("    No valid values to calculate mean.\n")
    }
    cat("\n")
  }
  cat("========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on first random row
 cat("\n\n")
 qa_check_random_row()
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
 cat("Run this command in R console:\n")
 cat("  qa_check_random_row()\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 # COMMENTED OUT FOR REVIEW - Uncomment when ready to save
 # write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
 cat("Review the output above, then uncomment line 163 to save changes.\n")
 cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,298 @@
 # Script to compute AOT and CRT scales in eohi2.csv
 # AOT: Reverse codes items 4-7, then averages all 8 items
 # CRT: Calculates proportion of correct and intuitive responses
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings=NULL keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define source columns
 aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
 crt_cols <- c("crt_1", "crt_2", "crt_3")
 # Define target columns
 target_cols <- c("aot_total", "crt_correct", "crt_int")
 # Define correct and intuitive CRT answers
 crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
 crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Check AOT columns
 missing_aot <- aot_cols[!aot_cols %in% df_cols]
 existing_aot <- aot_cols[aot_cols %in% df_cols]
 cat("AOT Source Columns:\n")
 cat("  Expected: 8 columns\n")
 cat("  Found:", length(existing_aot), "columns\n")
 cat("  Missing:", length(missing_aot), "columns\n")
 if (length(missing_aot) > 0) {
  cat("\n  Missing AOT columns:\n")
  for (col in missing_aot) {
    cat("    -", col, "\n")
  }
 }
 # Check CRT columns
 missing_crt <- crt_cols[!crt_cols %in% df_cols]
 existing_crt <- crt_cols[crt_cols %in% df_cols]
 cat("\nCRT Source Columns:\n")
 cat("  Expected: 3 columns\n")
 cat("  Found:", length(existing_crt), "columns\n")
 cat("  Missing:", length(missing_crt), "columns\n")
 if (length(missing_crt) > 0) {
  cat("\n  Missing CRT columns:\n")
  for (col in missing_crt) {
    cat("    -", col, "\n")
  }
 }
 # Check target columns
 missing_targets <- target_cols[!target_cols %in% df_cols]
 existing_targets <- target_cols[target_cols %in% df_cols]
 cat("\nTarget Columns:\n")
 cat("  Expected: 3 columns\n")
 cat("  Found:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  Missing target columns:\n")
  for (col in missing_targets) {
    cat("    -", col, "\n")
  }
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # ============= PROCESS AOT SCALE =============
 cat("Processing AOT scale...\n")
 # Convert AOT columns to numeric (handling any non-numeric values)
 for (col in aot_cols) {
  if (col %in% names(df)) {
    df[[col]] <- as.numeric(df[[col]])
  }
 }
 # Calculate average with reverse coding (WITHOUT modifying original values)
 # Items 4, 5, 6, 7 are reverse coded for calculation only
 df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
  # Create a copy for calculation
  values <- as.numeric(row)
  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
  reverse_positions <- c(4, 5, 6, 7)
  values[reverse_positions] <- values[reverse_positions] * -1
  # Return mean (na.rm = TRUE handles missing values)
  mean(values, na.rm = TRUE)
 })
 cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
 cat("  Original AOT item values preserved in dataframe.\n\n")
 # ============= PROCESS CRT SCALES =============
 cat("Processing CRT scales...\n")
 # Initialize CRT columns
 df$crt_correct <- NA
 df$crt_int <- NA
 # Process each row
 for (i in 1:nrow(df)) {
  # CRT Correct
  crt_correct_count <- 0
  crt_correct_n <- 0
  for (j in 1:3) {
    col <- crt_cols[j]
    if (col %in% names(df)) {
      response <- trimws(tolower(as.character(df[i, col])))
      correct_answer <- tolower(crt_correct_answers[j])
      if (!is.na(response) && response != "") {
        crt_correct_n <- crt_correct_n + 1
        if (response == correct_answer) {
          crt_correct_count <- crt_correct_count + 1
        }
      }
    }
  }
  # Calculate proportion correct
  if (crt_correct_n > 0) {
    df$crt_correct[i] <- crt_correct_count / crt_correct_n
  }
  # CRT Intuitive
  crt_int_count <- 0
  crt_int_n <- 0
  for (j in 1:3) {
    col <- crt_cols[j]
    if (col %in% names(df)) {
      response <- trimws(tolower(as.character(df[i, col])))
      intuitive_answer <- tolower(crt_intuitive_answers[j])
      if (!is.na(response) && response != "") {
        crt_int_n <- crt_int_n + 1
        if (response == intuitive_answer) {
          crt_int_count <- crt_int_count + 1
        }
      }
    }
  }
  # Calculate proportion intuitive
  if (crt_int_n > 0) {
    df$crt_int[i] <- crt_int_count / crt_int_n
  }
 }
 cat("  CRT correct and intuitive scores calculated.\n\n")
 cat("=== PROCESSING COMPLETE ===\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
 # This function can be run multiple times to check different random rows
 qa_check_random_row <- function() {
  # Pick a random row
  random_row <- sample(1:nrow(df), 1)
  cat("\n========================================\n")
  cat("QA CHECK: Random Row #", random_row, "\n")
  cat("========================================\n\n")
  # AOT Check
  cat("--- AOT SCALE ---\n")
  cat("Source values (original in CSV):\n")
  aot_original <- numeric(8)
  aot_for_calc <- numeric(8)
  for (i in 1:8) {
    col <- aot_cols[i]
    val <- if (col %in% names(df)) df[random_row, col] else NA
    aot_original[i] <- val
    # Apply reversal for items 4-7
    if (i %in% 4:7) {
      aot_for_calc[i] <- val * -1
      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
                  col, 
                  ifelse(is.na(val), "NA", as.character(val)),
                  ifelse(is.na(val), "NA", as.character(val * -1))))
    } else {
      aot_for_calc[i] <- val
      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
    }
  }
  # Manual calculation check
  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
  if (length(valid_aot) > 0) {
    expected_mean <- mean(valid_aot)
    actual_value <- df$aot_total[random_row]
    cat(sprintf("\nCalculation check:\n"))
    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
  } else {
    cat("\n  No valid AOT values to calculate.\n")
  }
  # CRT Check
  cat("\n--- CRT SCALE ---\n")
  cat("Source values:\n")
  crt_correct_count <- 0
  crt_int_count <- 0
  crt_n <- 0
  for (i in 1:3) {
    col <- crt_cols[i]
    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
    val_trimmed <- trimws(tolower(val))
    correct_ans <- crt_correct_answers[i]
    intuitive_ans <- crt_intuitive_answers[i]
    is_correct <- val_trimmed == tolower(correct_ans)
    is_intuitive <- val_trimmed == tolower(intuitive_ans)
    if (val_trimmed != "" && !is.na(val_trimmed)) {
      crt_n <- crt_n + 1
      if (is_correct) crt_correct_count <- crt_correct_count + 1
      if (is_intuitive) crt_int_count <- crt_int_count + 1
    }
    cat(sprintf("  %s: '%s'\n", col, val))
    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
  }
  cat("\nCalculation check:\n")
  if (crt_n > 0) {
    expected_correct <- crt_correct_count / crt_n
    expected_int <- crt_int_count / crt_n
    actual_correct <- df$crt_correct[random_row]
    actual_int <- df$crt_int[random_row]
    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
  } else {
    cat("  No valid CRT responses to calculate.\n")
  }
  cat("\n========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on first random row
 cat("\n\n")
 qa_check_random_row()
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
 cat("Run this command in R console:\n")
 cat("  qa_check_random_row()\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 # COMMENTED OUT FOR REVIEW - Uncomment when ready to save
 write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
 cat("Review the output above, then uncomment line 253 to save changes.\n")
 cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
--- a/differences.r
+++ b/differences.r
@ -0,0 +1,292 @@
 # Script to calculate absolute differences between time intervals in eohi2.csv
 # Compares present vs past/future, and 5-year vs 10-year intervals
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings=NULL keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define the 15 item names (same order for all time periods)
 items <- c(
  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
 )
 # Note: present uses lowercase "tv", others use uppercase "TV"
 items_present <- c(
  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
 )
 # Define all source columns (75 total)
 source_cols <- c(
  paste0("present_", items_present),
  paste0("past_5_", items),
  paste0("past_10_", items),
  paste0("fut_5_", items),
  paste0("fut_10_", items)
 )
 # Define all target columns (90 total = 6 calculation types × 15 items)
 target_NPast_5 <- paste0("NPast_5_", items)
 target_NPast_10 <- paste0("NPast_10_", items)
 target_NFut_5 <- paste0("NFut_5_", items)
 target_NFut_10 <- paste0("NFut_10_", items)
 target_5_10past <- paste0("5.10past_", items)
 target_5_10fut <- paste0("5.10fut_", items)
 target_cols <- c(
  target_NPast_5,
  target_NPast_10,
  target_NFut_5,
  target_NFut_10,
  target_5_10past,
  target_5_10fut
 )
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Check Source columns
 missing_source <- source_cols[!source_cols %in% df_cols]
 existing_source <- source_cols[source_cols %in% df_cols]
 cat("Source Columns:\n")
 cat("  Expected: 75 columns\n")
 cat("  Found:", length(existing_source), "columns\n")
 cat("  Missing:", length(missing_source), "columns\n")
 if (length(missing_source) > 0 && length(missing_source) <= 10) {
  cat("\n  Missing source columns:\n")
  for (col in missing_source) {
    cat("    -", col, "\n")
  }
 } else if (length(missing_source) > 10) {
  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
 }
 # Check Target columns
 missing_targets <- target_cols[!target_cols %in% df_cols]
 existing_targets <- target_cols[target_cols %in% df_cols]
 cat("\nTarget Columns:\n")
 cat("  Expected: 90 columns\n")
 cat("  Found:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  Target columns do NOT exist yet - will create them.\n")
  if (length(existing_targets) > 0) {
    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
  }
 } else {
  cat("  All target columns exist - will overwrite with calculated values.\n")
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_source) > 30) {
  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # ============= CALCULATE DIFFERENCES =============
 cat("Calculating time interval differences...\n")
 # Convert source columns to numeric
 for (col in source_cols) {
  if (col %in% names(df)) {
    df[[col]] <- as.numeric(df[[col]])
  }
 }
 # Helper function to calculate absolute difference
 calc_abs_diff <- function(col1, col2) {
  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
  abs(val1 - val2)
 }
 # Calculate NPast_5: |present - past_5|
 cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
 for (i in 1:15) {
  target <- target_NPast_5[i]
  source1 <- paste0("present_", items_present[i])
  source2 <- paste0("past_5_", items[i])
  df[[target]] <- calc_abs_diff(source1, source2)
 }
 # Calculate NPast_10: |present - past_10|
 cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
 for (i in 1:15) {
  target <- target_NPast_10[i]
  source1 <- paste0("present_", items_present[i])
  source2 <- paste0("past_10_", items[i])
  df[[target]] <- calc_abs_diff(source1, source2)
 }
 # Calculate NFut_5: |present - fut_5|
 cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
 for (i in 1:15) {
  target <- target_NFut_5[i]
  source1 <- paste0("present_", items_present[i])
  source2 <- paste0("fut_5_", items[i])
  df[[target]] <- calc_abs_diff(source1, source2)
 }
 # Calculate NFut_10: |present - fut_10|
 cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
 for (i in 1:15) {
  target <- target_NFut_10[i]
  source1 <- paste0("present_", items_present[i])
  source2 <- paste0("fut_10_", items[i])
  df[[target]] <- calc_abs_diff(source1, source2)
 }
 # Calculate 5.10past: |past_5 - past_10|
 cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
 for (i in 1:15) {
  target <- target_5_10past[i]
  source1 <- paste0("past_5_", items[i])
  source2 <- paste0("past_10_", items[i])
  df[[target]] <- calc_abs_diff(source1, source2)
 }
 # Calculate 5.10fut: |fut_5 - fut_10|
 cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
 for (i in 1:15) {
  target <- target_5_10fut[i]
  source1 <- paste0("fut_5_", items[i])
  source2 <- paste0("fut_10_", items[i])
  df[[target]] <- calc_abs_diff(source1, source2)
 }
 cat("\n=== CALCULATION COMPLETE ===\n")
 cat("  90 difference columns created.\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
 # This function can be run multiple times to check different random rows and items
 qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
  # Pick a random row or use specified row
  if (is.null(row_num)) {
    random_row <- sample(seq_len(nrow(df)), 1)
    cat("\n========================================\n")
    cat("QA CHECK: Random Row #", random_row, "\n")
  } else {
    if (row_num < 1 || row_num > nrow(df)) {
      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
      return()
    }
    random_row <- row_num
    cat("\n========================================\n")
    cat("QA CHECK: Specified Row #", random_row, "\n")
  }
  # Pick a random item or use specified item
  if (is.null(item_num)) {
    test_item_idx <- sample(1:15, 1)
    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
  } else {
    if (item_num < 1 || item_num > 15) {
      cat("ERROR: Item number must be between 1 and 15\n")
      return()
    }
    test_item_idx <- item_num
    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
  }
  cat("========================================\n\n")
  calculations <- list(
    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
         source1 = paste0("present_", items_present[test_item_idx]), 
         source2 = paste0("past_5_", items[test_item_idx]),
         desc = "|present - past_5|"),
    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
         source1 = paste0("present_", items_present[test_item_idx]), 
         source2 = paste0("past_10_", items[test_item_idx]),
         desc = "|present - past_10|"),
    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
         source1 = paste0("present_", items_present[test_item_idx]), 
         source2 = paste0("fut_5_", items[test_item_idx]),
         desc = "|present - fut_5|"),
    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
         source1 = paste0("present_", items_present[test_item_idx]), 
         source2 = paste0("fut_10_", items[test_item_idx]),
         desc = "|present - fut_10|"),
    list(name = "5.10past", target = target_5_10past[test_item_idx], 
         source1 = paste0("past_5_", items[test_item_idx]), 
         source2 = paste0("past_10_", items[test_item_idx]),
         desc = "|past_5 - past_10|"),
    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
         source1 = paste0("fut_5_", items[test_item_idx]), 
         source2 = paste0("fut_10_", items[test_item_idx]),
         desc = "|fut_5 - fut_10|")
  )
  for (calc in calculations) {
    cat(sprintf("--- %s ---\n", calc$name))
    cat(sprintf("Formula: %s\n", calc$desc))
    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
    target_val <- df[random_row, calc$target]
    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
    if (!is.na(val1) && !is.na(val2)) {
      expected_diff <- abs(val1 - val2)
      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
    } else {
      cat("  Cannot calculate (missing values)\n")
    }
    cat("\n")
  }
  cat("========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on random row and random item
 cat("\n\n")
 qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
 cat("For random row AND random item, run:\n")
 cat("  qa_check_random_row()\n")
 cat("\nFor specific row (e.g., row 118) with random item:\n")
 cat("  qa_check_random_row(118)\n")
 cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
 cat("  qa_check_random_row(item_num = 5)\n")
 cat("\nFor specific row AND specific item:\n")
 cat("  qa_check_random_row(118, 5)\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 # COMMENTED OUT FOR REVIEW - Uncomment when ready to save
 # write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
 cat("Review the output above, then uncomment line 243 to save changes.\n")
 cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,265 @@
 # Script to calculate domain means for time interval differences in eohi2.csv
 # Averages the 5 items within each domain (pref, pers, val) for each time interval type
 # Load necessary library
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data (with check.names=FALSE to preserve original column names)
 # na.strings=NULL keeps empty cells as empty strings instead of converting to NA
 df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
 # Define the 15 item names (same order for all time periods)
 items <- c(
  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
 )
 # Define domain groupings (indices in items vector)
 pref_indices <- 1:5
 pers_indices <- 6:10
 val_indices <- 11:15
 # Define time interval prefixes
 time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
 # Define domain names
 domain_names <- c("pref", "pers", "val")
 # Define all source columns (90 total)
 source_cols <- c(
  paste0("NPast_5_", items),
  paste0("NPast_10_", items),
  paste0("NFut_5_", items),
  paste0("NFut_10_", items),
  paste0("X5.10past_", items),
  paste0("X5.10fut_", items)
 )
 # Define all target columns (18 total = 6 time intervals × 3 domains)
 target_cols <- c(
  paste0("NPast_5_", domain_names, "_MEAN"),
  paste0("NPast_10_", domain_names, "_MEAN"),
  paste0("NFut_5_", domain_names, "_MEAN"),
  paste0("NFut_10_", domain_names, "_MEAN"),
  paste0("X5.10past_", domain_names, "_MEAN"),
  paste0("X5.10fut_", domain_names, "_MEAN")
 )
 # ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
 cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
 # Get actual column names from dataframe (trimmed)
 df_cols <- trimws(names(df))
 # Check Source columns
 missing_source <- source_cols[!source_cols %in% df_cols]
 existing_source <- source_cols[source_cols %in% df_cols]
 cat("Source Columns:\n")
 cat("  Expected: 90 columns\n")
 cat("  Found:", length(existing_source), "columns\n")
 cat("  Missing:", length(missing_source), "columns\n")
 if (length(missing_source) > 0 && length(missing_source) <= 20) {
  cat("\n  Missing source columns:\n")
  for (col in missing_source) {
    cat("    -", col, "\n")
  }
 } else if (length(missing_source) > 20) {
  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
 }
 # Check Target columns
 missing_targets <- target_cols[!target_cols %in% df_cols]
 existing_targets <- target_cols[target_cols %in% df_cols]
 cat("\nTarget Columns:\n")
 cat("  Expected: 18 columns\n")
 cat("  Found:", length(existing_targets), "columns\n")
 cat("  Missing:", length(missing_targets), "columns\n")
 if (length(missing_targets) > 0) {
  cat("\n  Target columns do NOT exist yet - will create them.\n")
  if (length(existing_targets) > 0) {
    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
  }
 } else {
  cat("  All target columns exist - will overwrite with calculated values.\n")
 }
 cat("\n=== END CHECK ===\n\n")
 # Stop if critical columns are missing
 if (length(missing_source) > 45) {
  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
 }
 cat("Proceeding with processing...\n\n")
 # ============= CALCULATE DOMAIN MEANS =============
 cat("Calculating domain means for time interval differences...\n")
 # Convert source columns to numeric
 for (col in source_cols) {
  if (col %in% names(df)) {
    df[[col]] <- as.numeric(df[[col]])
  }
 }
 # Calculate means for each time interval × domain combination
 for (time_prefix in time_prefixes) {
  # Preferences mean
  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
  if (length(existing_pref_cols) > 0) {
    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
  }
  # Personality mean
  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
  if (length(existing_pers_cols) > 0) {
    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
  }
  # Values mean
  val_cols <- paste0(time_prefix, "_", items[val_indices])
  existing_val_cols <- val_cols[val_cols %in% names(df)]
  if (length(existing_val_cols) > 0) {
    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
  }
 }
 cat("\n=== CALCULATION COMPLETE ===\n")
 cat("  18 domain mean columns created.\n\n")
 # ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
 # This function can be run multiple times to check different random rows and time intervals
 qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
  # Pick a random row or use specified row
  if (is.null(row_num)) {
    random_row <- sample(seq_len(nrow(df)), 1)
    cat("\n========================================\n")
    cat("QA CHECK: Random Row #", random_row, "\n")
  } else {
    if (row_num < 1 || row_num > nrow(df)) {
      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
      return()
    }
    random_row <- row_num
    cat("\n========================================\n")
    cat("QA CHECK: Specified Row #", random_row, "\n")
  }
  # Pick a random time interval or use specified interval
  if (is.null(time_interval_num)) {
    test_interval_idx <- sample(1:6, 1)
    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
  } else {
    if (time_interval_num < 1 || time_interval_num > 6) {
      cat("ERROR: Time interval number must be between 1 and 6\n")
      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
      return()
    }
    test_interval_idx <- time_interval_num
    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
  }
  cat("========================================\n\n")
  time_prefix <- time_prefixes[test_interval_idx]
  # Check each of the 3 domains
  for (domain_idx in 1:3) {
    domain_name <- domain_names[domain_idx]
    # Get the appropriate item indices
    if (domain_idx == 1) {
      item_indices <- pref_indices
      domain_label <- "Preferences"
    } else if (domain_idx == 2) {
      item_indices <- pers_indices
      domain_label <- "Personality"
    } else {
      item_indices <- val_indices
      domain_label <- "Values"
    }
    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
    # Get source column names
    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
    # Get values
    values <- numeric(5)
    cat("Source values:\n")
    for (i in 1:5) {
      col <- source_cols_domain[i]
      val <- if (col %in% names(df)) df[random_row, col] else NA
      values[i] <- val
      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
    }
    # Calculate expected mean
    valid_values <- values[!is.na(values)]
    if (length(valid_values) > 0) {
      expected_mean <- mean(valid_values)
      actual_value <- df[random_row, target_col]
      cat(sprintf("\nCalculation:\n"))
      cat(sprintf("  Sum: %s = %.5f\n", 
                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
                  sum(valid_values)))
      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
    } else {
      cat("  No valid values to calculate mean.\n")
    }
    cat("\n")
  }
  cat("========================================\n")
  cat("END QA CHECK\n")
  cat("========================================\n\n")
 }
 # Run QA check on random row and random time interval
 cat("\n\n")
 qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
 # Instructions for running additional checks
 cat("\n")
 cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
 cat("For random row AND random time interval, run:\n")
 cat("  qa_check_random_row()\n")
 cat("\nFor specific row (e.g., row 118) with random interval:\n")
 cat("  qa_check_random_row(118)\n")
 cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
 cat("  qa_check_random_row(time_interval_num = 3)\n")
 cat("\nFor specific row AND specific interval:\n")
 cat("  qa_check_random_row(118, 3)\n")
 cat("\n")
 cat("Time Interval Numbers:\n")
 cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
 cat("  4 = NFut_10,  5 = X5.10past,  6 = X5.10fut\n")
 cat("\n")
 # Save the modified dataframe back to CSV
 # na="" writes NA values as empty cells instead of "NA" text
 # COMMENTED OUT FOR REVIEW - Uncomment when ready to save
 write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
 cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
 cat("Review the output above, then uncomment line 234 to save changes.\n")
 cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,95 @@
 # Script 08: Create 5_10 DGEN Variables
 # PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
 #          for both Past and Future directions
 # VARIABLES CREATED: 6 total (3 domains × 2 time directions)
 library(tidyverse)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Read the data
 data <- read.csv("eohi2.csv")
 print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
 print(paste("Number of participants:", length(unique(data$ResponseId))))
 # Verify source columns exist
 source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
 missing_vars <- source_vars[!source_vars %in% colnames(data)]
 if (length(missing_vars) > 0) {
  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
 }
 print("All source DGEN variables found!")
 # Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
 # Formula: |DGEN_5 - DGEN_10|
 # NOTE: Using X prefix because R adds it to column names starting with numbers
 # PAST direction
 data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
 data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
 data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
 # FUTURE direction
 data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
 data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
 data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
 # Verify variables were created
 target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
 print("\n=== VARIABLES CREATED ===")
 print(target_vars)
 # Check for missing values
 for(var in target_vars) {
  n_missing <- sum(is.na(data[[var]]))
  pct_missing <- round(100 * n_missing / nrow(data), 2)
  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
 }
 # Quality check: Display sample rows
 print("\n=== QUALITY CHECK: Sample Calculations ===")
 sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
 for(i in sample_rows) {
  print(sprintf("\nRow %d:", i))
  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
  print(sprintf("  → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
                data$X5_10DGEN_past_pref[i],
                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
  print(sprintf("  → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
                data$X5_10DGEN_fut_pers[i],
                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
 }
 # Descriptive statistics
 print("\n=== DESCRIPTIVE STATISTICS ===")
 desc_stats <- data %>%
  summarise(across(all_of(target_vars),
                   list(n = ~sum(!is.na(.)),
                        mean = ~round(mean(., na.rm = TRUE), 5),
                        sd = ~round(sd(., na.rm = TRUE), 5),
                        min = ~round(min(., na.rm = TRUE), 5),
                        max = ~round(max(., na.rm = TRUE), 5)),
                   .names = "{.col}_{.fn}"))
 print(t(desc_stats))
 # Save to CSV
 write.csv(data, "eohi2.csv", row.names = FALSE)
 print("\n=== PROCESSING COMPLETE ===")
 print("Data saved to eohi2.csv")
 print(paste("Total columns now:", ncol(data)))
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,223 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load data
 data <- read.csv("eohi2.csv")
 # Set 1: NPast_5_mean (15 variables)
 data$NPast_5_mean <- rowMeans(data[, c(
  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
 )], na.rm = TRUE)
 # Set 2: NPast_10_mean (15 variables)
 data$NPast_10_mean <- rowMeans(data[, c(
  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
 )], na.rm = TRUE)
 # Set 3: NFut_5_mean (15 variables)
 data$NFut_5_mean <- rowMeans(data[, c(
  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
 )], na.rm = TRUE)
 # Set 4: NFut_10_mean (15 variables)
 data$NFut_10_mean <- rowMeans(data[, c(
  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
 )], na.rm = TRUE)
 # Set 5: X5.10past_mean (15 variables)
 data$X5.10past_mean <- rowMeans(data[, c(
  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
 )], na.rm = TRUE)
 # Set 6: X5.10fut_mean (15 variables)
 data$X5.10fut_mean <- rowMeans(data[, c(
  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
 )], na.rm = TRUE)
 # Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
 data$NPast_global_mean <- rowMeans(data[, c(
  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
 )], na.rm = TRUE)
 # Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
 data$NFut_global_mean <- rowMeans(data[, c(
  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
 )], na.rm = TRUE)
 # Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
 data$X5.10_global_mean <- rowMeans(data[, c(
  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
 )], na.rm = TRUE)
 # Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
 data$N5_global_mean <- rowMeans(data[, c(
  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
 )], na.rm = TRUE)
 # Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
 data$N10_global_mean <- rowMeans(data[, c(
  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
 )], na.rm = TRUE)
 # Save the data
 write.csv(data, "eohi2.csv", row.names = FALSE)
 # ===== QA CODE: Check first 5 rows =====
 cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
 for (i in 1:min(5, nrow(data))) {
  cat("--- Row", i, "---\n")
  # Set 1: NPast_5_mean
  calc1 <- mean(as.numeric(data[i, c(
    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
  )]), na.rm = TRUE)
  cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i], 
      "| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
  # Set 2: NPast_10_mean
  calc2 <- mean(as.numeric(data[i, c(
    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
  )]), na.rm = TRUE)
  cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i], 
      "| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
  # Set 3: NFut_5_mean
  calc3 <- mean(as.numeric(data[i, c(
    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
  )]), na.rm = TRUE)
  cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i], 
      "| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
  # Set 4: NFut_10_mean
  calc4 <- mean(as.numeric(data[i, c(
    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
  )]), na.rm = TRUE)
  cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i], 
      "| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
  # Set 5: X5.10past_mean
  calc5 <- mean(as.numeric(data[i, c(
    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
  )]), na.rm = TRUE)
  cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i], 
      "| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
  # Set 6: X5.10fut_mean
  calc6 <- mean(as.numeric(data[i, c(
    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
  )]), na.rm = TRUE)
  cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i], 
      "| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
  # Set 7: NPast_global_mean
  calc7 <- mean(as.numeric(data[i, c(
    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
  )]), na.rm = TRUE)
  cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
  # Set 8: NFut_global_mean
  calc8 <- mean(as.numeric(data[i, c(
    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
  )]), na.rm = TRUE)
  cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
  # Set 9: X5.10_global_mean
  calc9 <- mean(as.numeric(data[i, c(
    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
  )]), na.rm = TRUE)
  cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
  # Set 10: N5_global_mean
  calc10 <- mean(as.numeric(data[i, c(
    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
  )]), na.rm = TRUE)
  cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
  # Set 11: N10_global_mean
  calc11 <- mean(as.numeric(data[i, c(
    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
  )]), na.rm = TRUE)
  cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
 }
 cat("=== QA CHECK COMPLETE ===\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,115 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load data
 data <- read.csv("eohi2.csv")
 # Set 1: DGEN_past_5.10_mean (3 variables)
 data$DGEN_past_5.10_mean <- rowMeans(data[, c(
  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
 )], na.rm = TRUE)
 # Set 2: DGEN_fut_5.10_mean (3 variables)
 data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
 )], na.rm = TRUE)
 # Set 3: DGENpast_global_mean (9 variables)
 data$DGENpast_global_mean <- rowMeans(data[, c(
  "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
  "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
  "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
 )], na.rm = TRUE)
 # Set 4: DGENfut_global_mean (9 variables)
 data$DGENfut_global_mean <- rowMeans(data[, c(
  "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
  "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
  "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
 )], na.rm = TRUE)
 # Set 5: DGEN_5_global_mean (6 variables)
 data$DGEN_5_global_mean <- rowMeans(data[, c(
  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
 )], na.rm = TRUE)
 # Set 6: DGEN_10_global_mean (6 variables)
 data$DGEN_10_global_mean <- rowMeans(data[, c(
  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
 )], na.rm = TRUE)
 # Set 7: DGEN_5.10_global_mean (6 variables)
 data$DGEN_5.10_global_mean <- rowMeans(data[, c(
  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
 )], na.rm = TRUE)
 # Save the data
 write.csv(data, "eohi2.csv", row.names = FALSE)
 # ===== QA CODE: Check first 5 rows =====
 cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
 for (i in 1:min(5, nrow(data))) {
  cat("--- Row", i, "---\n")
  # Set 1: DGEN_past_5.10_mean
  calc1 <- mean(as.numeric(data[i, c(
    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
  )]), na.rm = TRUE)
  cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i], 
      "| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
  # Set 2: DGEN_fut_5.10_mean
  calc2 <- mean(as.numeric(data[i, c(
    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
  )]), na.rm = TRUE)
  cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i], 
      "| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
  # Set 3: DGENpast_global_mean
  calc3 <- mean(as.numeric(data[i, c(
    "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
    "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
    "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
  )]), na.rm = TRUE)
  cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
  # Set 4: DGENfut_global_mean
  calc4 <- mean(as.numeric(data[i, c(
    "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
    "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
    "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
  )]), na.rm = TRUE)
  cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
  # Set 5: DGEN_5_global_mean
  calc5 <- mean(as.numeric(data[i, c(
    "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
    "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
  )]), na.rm = TRUE)
  cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
  # Set 6: DGEN_10_global_mean
  calc6 <- mean(as.numeric(data[i, c(
    "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
    "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
  )]), na.rm = TRUE)
  cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
  # Set 7: DGEN_5.10_global_mean
  calc7 <- mean(as.numeric(data[i, c(
    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
  )]), na.rm = TRUE)
  cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i], 
      "| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
 }
 cat("=== QA CHECK COMPLETE ===\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,235 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load data
 data <- read.csv("eohi2.csv")
 # Create EHI difference variables (NPast - NFut) for different time intervals
 # === 5-YEAR PAST-FUTURE PAIRS ===
 # Preferences
 data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
 data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
 data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
 data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
 data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
 # Personality
 data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
 data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
 data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
 data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
 data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
 # Values
 data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
 data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
 data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
 data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
 data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
 # === 10-YEAR PAST-FUTURE PAIRS ===
 # Preferences
 data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
 data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
 data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
 data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
 data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
 # Personality
 data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
 data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
 data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
 data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
 data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
 # Values
 data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
 data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
 data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
 data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
 data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
 # === 5-10 YEAR CHANGE VARIABLES ===
 # Preferences
 data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
 data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
 data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
 data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
 data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
 # Personality
 data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
 data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
 data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
 data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
 data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
 # Values
 data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
 data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
 data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
 data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
 data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
 # QA: Verify calculations - FIRST 5 ROWS with detailed output
 cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
 cat("--- 5-YEAR VARIABLES ---\n")
 for (i in 1:5) {
  cat(sprintf("\nRow %d:\n", i))
  cat(sprintf("  pref_read:      %g - %g = %g | ehi5_pref_read = %g %s\n",
              data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
              data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
              data$ehi5_pref_read[i],
              ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  pref_music:     %g - %g = %g | ehi5_pref_music = %g %s\n",
              data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
              data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
              data$ehi5_pref_music[i],
              ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
              data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
              data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
              data$ehi5_pers_extravert[i],
              ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
 }
 cat("\n--- 10-YEAR VARIABLES ---\n")
 for (i in 1:5) {
  cat(sprintf("\nRow %d:\n", i))
  cat(sprintf("  pref_read:      %g - %g = %g | ehi10_pref_read = %g %s\n",
              data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
              data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
              data$ehi10_pref_read[i],
              ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  pref_music:     %g - %g = %g | ehi10_pref_music = %g %s\n",
              data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
              data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
              data$ehi10_pref_music[i],
              ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
              data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
              data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
              data$ehi10_pers_extravert[i],
              ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
 }
 cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
 for (i in 1:5) {
  cat(sprintf("\nRow %d:\n", i))
  cat(sprintf("  pref_read:      %g - %g = %g | ehi5.10_pref_read = %g %s\n",
              data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
              data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
              data$ehi5.10_pref_read[i],
              ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  pref_music:     %g - %g = %g | ehi5.10_pref_music = %g %s\n",
              data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
              data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
              data$ehi5.10_pref_music[i],
              ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
              data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
              data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
              data$ehi5.10_pers_extravert[i],
              ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
 }
 # Full QA check for all rows and all variables
 cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
 qa_pairs <- list(
  # 5-year pairs
  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
  # 10-year pairs
  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
  # 5-10 year change pairs
  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
 )
 all_checks_passed <- TRUE
 for (pair in qa_pairs) {
  # Calculate expected difference
  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
  # Get actual value in target variable
  actual_value <- data[[pair$target]]
  # Compare (allowing for floating point precision issues)
  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
  if (length(discrepancies) > 0) {
    cat(sprintf("FAIL: %s\n", pair$target))
    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
                length(discrepancies), 
                paste(head(discrepancies, 10), collapse = ", ")))
    # Show first discrepancy details
    row_num <- discrepancies[1]
    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
                row_num,
                pair$npast, data[[pair$npast]][row_num],
                pair$nfut, data[[pair$nfut]][row_num],
                expected_diff[row_num],
                pair$target, actual_value[row_num]))
    all_checks_passed <- FALSE
  } else {
    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
  }
 }
 cat("\n")
 if (all_checks_passed) {
  cat("*** ALL QA CHECKS PASSED ***\n")
 } else {
  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
 }
 # Save updated dataset
 write.csv(data, "eohi2.csv", row.names = FALSE)
 cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,118 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load data
 data <- read.csv("eohi2.csv")
 # Create DGEN EHI difference variables (Past - Future) for different time intervals
 # === 5-YEAR DGEN PAST-FUTURE PAIRS ===
 data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
 data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
 data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
 # === 10-YEAR DGEN PAST-FUTURE PAIRS ===
 data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
 data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
 data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
 # QA: Verify calculations - FIRST 5 ROWS with detailed output
 cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
 cat("--- 5-YEAR DGEN VARIABLES ---\n")
 for (i in 1:5) {
  cat(sprintf("\nRow %d:\n", i))
  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
              data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
              data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
              data$ehiDGEN_5_Pref[i],
              ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
              data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
              data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
              data$ehiDGEN_5_Pers[i],
              ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
              data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
              data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
              data$ehiDGEN_5_Val[i],
              ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
 }
 cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
 for (i in 1:5) {
  cat(sprintf("\nRow %d:\n", i))
  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
              data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
              data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
              data$ehiDGEN_10_Pref[i],
              ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
              data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
              data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
              data$ehiDGEN_10_Pers[i],
              ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
              data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
              data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
              data$ehiDGEN_10_Val[i],
              ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
 }
 # Full QA check for all rows and all variables
 cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
 qa_pairs <- list(
  # 5-year DGEN pairs
  list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
  list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
  list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
  # 10-year DGEN pairs
  list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
  list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
  list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
 )
 all_checks_passed <- TRUE
 for (pair in qa_pairs) {
  # Calculate expected difference
  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
  # Get actual value in target variable
  actual_value <- data[[pair$target]]
  # Compare (allowing for floating point precision issues)
  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
  if (length(discrepancies) > 0) {
    cat(sprintf("FAIL: %s\n", pair$target))
    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
                length(discrepancies), 
                paste(head(discrepancies, 10), collapse = ", ")))
    # Show first discrepancy details
    row_num <- discrepancies[1]
    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
                row_num,
                pair$npast, data[[pair$npast]][row_num],
                pair$nfut, data[[pair$nfut]][row_num],
                expected_diff[row_num],
                pair$target, actual_value[row_num]))
    all_checks_passed <- FALSE
  } else {
    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
  }
 }
 cat("\n")
 if (all_checks_passed) {
  cat("*** ALL QA CHECKS PASSED ***\n")
 } else {
  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
 }
 # Save updated dataset
 write.csv(data, "eohi2.csv", row.names = FALSE)
 cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,161 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load data
 data <- read.csv("eohi2.csv")
 # Calculate domain-specific mean scores for EHI variables across time intervals
 # === 5-YEAR MEANS ===
 data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
                                          "ehi5_pref_TV", "ehi5_pref_nap", 
                                          "ehi5_pref_travel")], na.rm = TRUE)
 data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
                                          "ehi5_pers_complex")], na.rm = TRUE)
 data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
                                         "ehi5_val_opinion", "ehi5_val_performance", 
                                         "ehi5_val_justice")], na.rm = TRUE)
 # === 10-YEAR MEANS ===
 data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
                                           "ehi10_pref_TV", "ehi10_pref_nap", 
                                           "ehi10_pref_travel")], na.rm = TRUE)
 data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
                                           "ehi10_pers_complex")], na.rm = TRUE)
 data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
                                          "ehi10_val_opinion", "ehi10_val_performance", 
                                          "ehi10_val_justice")], na.rm = TRUE)
 # === 5-10 YEAR CHANGE MEANS ===
 data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
                                             "ehi5.10_pref_travel")], na.rm = TRUE)
 data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
                                             "ehi5.10_pers_complex")], na.rm = TRUE)
 data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
                                            "ehi5.10_val_justice")], na.rm = TRUE)
 # QA: Verify mean calculations
 cat("\n=== QUALITY ASSURANCE CHECK ===\n")
 cat("Verifying EHI domain-specific mean calculations\n\n")
 cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
            data$ehi5_pref_travel[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi5_pref_MEAN[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
            data$ehi5_pers_complex[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi5_pers_MEAN[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
            data$ehi10_pref_travel[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi10_pref_MEAN[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
            data$ehi5.10_pref_travel[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi5.10_pref_MEAN[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 # Overall QA check for all rows
 cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
 qa_checks <- list(
  # 5-year means
  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
       target = "ehi5_val_MEAN", name = "5-Year Values"),
  # 10-year means
  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
       target = "ehi10_val_MEAN", name = "10-Year Values"),
  # 5-10 year change means
  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
 )
 all_checks_passed <- TRUE
 for (check in qa_checks) {
  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
  actual_mean <- data[[check$target]]
  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
  if (length(discrepancies) > 0) {
    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
                length(discrepancies), 
                paste(head(discrepancies, 10), collapse = ", ")))
    all_checks_passed <- FALSE
  } else {
    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
                check$name, length(check$vars), nrow(data)))
  }
 }
 cat("\n")
 if (all_checks_passed) {
  cat("*** ALL QA CHECKS PASSED ***\n")
 } else {
  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
 }
 # Save updated dataset
 write.csv(data, "eohi2.csv", row.names = FALSE)
 cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,140 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 # Load data
 data <- read.csv("eohi2.csv")
 # Calculate global mean scores for EHI variables across time intervals
 # === DGEN 5-YEAR GLOBAL MEAN ===
 data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
                                          "ehiDGEN_5_Val")], na.rm = TRUE)
 # === DGEN 10-YEAR GLOBAL MEAN ===
 data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
                                           "ehiDGEN_10_Val")], na.rm = TRUE)
 # === 5-YEAR GLOBAL MEAN ===
 data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
                                            "ehi5_val_MEAN")], na.rm = TRUE)
 # === 10-YEAR GLOBAL MEAN ===
 data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
                                             "ehi10_val_MEAN")], na.rm = TRUE)
 # === 5-10 YEAR CHANGE GLOBAL MEAN ===
 data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
 # QA: Verify mean calculations
 cat("\n=== QUALITY ASSURANCE CHECK ===\n")
 cat("Verifying EHI global mean calculations\n\n")
 cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i], 
            data$ehiDGEN_5_Val[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehiDGEN_5_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i], 
            data$ehiDGEN_10_Val[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehiDGEN_10_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i], 
            data$ehi5_val_MEAN[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi5_global_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i], 
            data$ehi10_val_MEAN[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi10_global_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
 for (i in 1:5) {
  vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i], 
            data$ehi5.10_val_MEAN[i])
  calc_mean <- mean(vals, na.rm = TRUE)
  actual_mean <- data$ehi5.10_global_mean[i]
  match <- abs(calc_mean - actual_mean) < 1e-10
  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
              i, vals[1], vals[2], vals[3], 
              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
 }
 # Overall QA check for all rows
 cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
 qa_checks <- list(
  # DGEN global means
  list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
       target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
  list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
       target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
  # Domain-specific global means
  list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
       target = "ehi5_global_mean", name = "5-Year Global"),
  list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
       target = "ehi10_global_mean", name = "10-Year Global"),
  list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
       target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
 )
 all_checks_passed <- TRUE
 for (check in qa_checks) {
  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
  actual_mean <- data[[check$target]]
  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
  if (length(discrepancies) > 0) {
    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
                length(discrepancies), 
                paste(head(discrepancies, 10), collapse = ", ")))
    all_checks_passed <- FALSE
  } else {
    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
                check$name, length(check$vars), nrow(data)))
  }
 }
 cat("\n")
 if (all_checks_passed) {
  cat("*** ALL QA CHECKS PASSED ***\n")
 } else {
  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
 }
 # Save updated dataset
 write.csv(data, "eohi2.csv", row.names = FALSE)
 cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,38 @@
 options(scipen = 999)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 data <- read.csv("eohi2.csv")
 # Check the levels of the demo_edu variable
 print(levels(factor(data$demo_edu)))
 # Also show the unique values and their frequencies
 print("\nUnique values and frequencies:")
 print(table(data$demo_edu, useNA = "ifany"))
 # Recode demo_edu into 3 ordinal levels
 data$edu3 <- NA
 # HS_TS: High School and Trade School
 data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
 # C_Ug: College and University - Undergraduate  
 data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
 # grad_prof: University - Graduate, University - PhD, and Professional Degree
 data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
 # Convert to ordered factor
 data$edu3 <- factor(data$edu3, 
                    levels = c("HS_TS", "C_Ug", "grad_prof"),
                    ordered = TRUE)
 # Check the recoded variable
 print(table(data$edu3, useNA = "ifany"))
 # Verify the recoding
 print(table(data$demo_edu, data$edu3, useNA = "ifany"))
 # Save the updated dataset with the new edu3 variable
 write.csv(data, "eohi2.csv", row.names = FALSE)
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,100 @@
 options(scipen = 999)
 library(dplyr)
 setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
 df <- read.csv("eohi2.csv")
 # Display means and standard deviations of non-standardized variables for manual checking
 print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
 print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
 print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
 print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
 print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
 print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
 print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
 print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
 # Calculate means and standard deviations for standardization
 mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
 sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
 mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
 sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
 mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
 sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
 mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
 sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
 # Create standardized variables
 df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
 df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
 df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
 df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
 # Check that variables have been standardized
 print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
 print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
 print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
 print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
 print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
 print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
 print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
 print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
 # Calculate mean of standardized variables
 df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
 #### check random 10 rows
 # Check 10 random rows to verify calculations
 set.seed(123)  # For reproducible random selection
 random_rows <- sample(nrow(df), 10)
 cat("Checking 10 random rows:\n")
 cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
 cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
 for(i in random_rows) {
  orig_5 <- df$ehiDGEN_5_mean[i]
  std_5 <- df$stdDGEN_5[i]
  calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
  orig_10 <- df$ehiDGEN_10_mean[i]
  std_10 <- df$stdDGEN_10[i]
  calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
  cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n", 
              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
 }
 cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
 cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
 for(i in random_rows) {
  orig_5 <- df$ehi5_global_mean[i]
  std_5 <- df$stdDS_5[i]
  calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
  orig_10 <- df$ehi10_global_mean[i]
  std_10 <- df$stdDS_10[i]
  calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
  cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n", 
              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
 }
 # Show the final stdEHI_mean for these rows
 cat("\nRow | stdEHI_mean | Manual calc\n")
 cat("----|-------------|------------\n")
 for(i in random_rows) {
  manual_mean <- -0.042564413	-0.158849227	-1.444812436	-0.23426232	-0.470122099
 mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
  cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
 }
 # Write to CSV
 write.csv(df, "eohi2.csv", row.names = FALSE)
--- a/eohi2/descriptive_statistics.csv
+++ b/eohi2/descriptive_statistics.csv
@ -0,0 +1,24 @@
 "","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
 "ehiDGEN_5_Pref",1,489,0.06339,1.9954,0,0.03308,1.4826,-9,9,18,0.27576,4.00366,0.09024
 "ehiDGEN_5_Pers",2,489,0.09816,1.85725,0,0.1145,1.4826,-9,7,16,-0.18711,3.23028,0.08399
 "ehiDGEN_5_Val",3,489,0.02454,2.16042,0,0.06361,1.4826,-9,10,19,0.1765,4.44483,0.0977
 "ehiDGEN_10_Pref",4,489,0.20245,2.29618,0,0.18575,1.4826,-10,9,19,0.01558,3.90242,0.10384
 "ehiDGEN_10_Pers",5,489,0.29448,2.34328,0,0.24936,1.4826,-9,10,19,0.06997,2.70639,0.10597
 "ehiDGEN_10_Val",6,489,0.40491,2.32319,0,0.32061,1.4826,-8,10,18,0.37025,2.73426,0.10506
 "ehi5_pref_MEAN",7,489,0.07607,0.64562,0,0.05954,0.29652,-4.4,4,8.4,0.13291,10.10075,0.0292
 "ehi5_pers_MEAN",8,489,0.07454,0.74495,0,0.05267,0.59304,-2.4,3.6,6,0.58141,3.07505,0.03369
 "ehi5_val_MEAN",9,489,0.05194,0.58562,0,0.02748,0.29652,-2.6,3,5.6,0.62482,5.46128,0.02648
 "ehi10_pref_MEAN",10,489,0.10348,0.69093,0,0.09059,0.59304,-3.6,3.6,7.2,0.02685,4.20962,0.03124
 "ehi10_pers_MEAN",11,489,0.12168,0.75889,0,0.09389,0.59304,-3.6,3.8,7.4,0.14345,3.87988,0.03432
 "ehi10_val_MEAN",12,489,0.14397,0.71372,0,0.08193,0.29652,-2.4,4,6.4,1.25911,5.60017,0.03228
 "ehi5.10_pref_MEAN",13,489,0.13538,0.70635,0,0.12214,0.29652,-5.2,2.6,7.8,-0.87659,8.98815,0.03194
 "ehi5.10_pers_MEAN",14,489,0.18773,0.77513,0,0.16489,0.59304,-3.6,3.4,7,0.18903,2.50653,0.03505
 "ehi5.10_val_MEAN",15,489,0.12229,0.73027,0,0.08193,0.29652,-2.8,3.6,6.4,0.68131,4.79817,0.03302
 "ehiDGEN_5_mean",16,489,0.06203,1.45735,0,0.06361,0.9884,-7.66667,5.66667,13.33333,-0.33245,3.67417,0.0659
 "ehiDGEN_10_mean",17,489,0.30061,1.89245,0,0.28329,0.9884,-8,9,17,0.01127,3.38489,0.08558
 "ehi5_global_mean",18,489,0.06752,0.46201,0,0.0486,0.29652,-1.93333,3.33333,5.26667,0.93595,7.08224,0.02089
 "ehi10_global_mean",19,489,0.12304,0.52522,0,0.08494,0.29652,-2.46667,2.33333,4.8,0.55734,3.94213,0.02375
 "ehi5.10_global_mean",20,489,0.14847,0.50536,0.06667,0.12231,0.29652,-1.93333,2.8,4.73333,0.67593,4.05776,0.02285
 "aot_total",21,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
 "crt_correct",22,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
 "crt_int",23,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
--- a/eohi2/descriptive_statistics_domain_general_vars.csv
+++ b/eohi2/descriptive_statistics_domain_general_vars.csv
@ -0,0 +1,15 @@
 "","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
 "DGEN_past_5_mean",1,489,3.70279,2.64159,3.33333,3.57422,2.9652,0,10,10,0.33519,-0.92598,0.11946
 "DGEN_past_10_mean",2,489,4.12747,2.69248,4,4.07209,3.4594,0,10,10,0.15303,-1.06997,0.12176
 "DGEN_fut_5_mean",3,489,3.64076,2.72097,3,3.47498,2.9652,0,10,10,0.45879,-0.91272,0.12305
 "DGEN_fut_10_mean",4,489,3.82686,2.67605,3.33333,3.6743,2.9652,0,10,10,0.41341,-0.85159,0.12101
 "DGEN_past_5.10_mean",5,489,1.10907,1.14357,0.66667,0.93384,0.9884,0,6.66667,6.66667,1.38388,2.20547,0.05171
 "DGEN_fut_5.10_mean",6,489,1.03272,1.16269,0.66667,0.83885,0.9884,0,7.33333,7.33333,1.82686,4.61137,0.05258
 "DGENpast_global_mean",7,489,2.97978,1.86338,3.11111,2.96409,2.30627,0,6.66667,6.66667,0.05122,-1.1105,0.08427
 "DGENfut_global_mean",8,489,2.83345,1.86526,2.66667,2.76788,2.30627,0,6.66667,6.66667,0.25867,-1.05797,0.08435
 "DGEN_5_global_mean",9,489,3.67178,2.58067,3.16667,3.53859,2.9652,0,10,10,0.38068,-0.90478,0.1167
 "DGEN_10_global_mean",10,489,3.97716,2.51197,3.83333,3.89992,2.9652,0,10,10,0.23861,-0.92036,0.1136
 "DGEN_5.10_global_mean",11,489,1.07089,0.95646,0.83333,0.95335,0.9884,0,5,5,1.13335,1.28416,0.04325
 "aot_total",12,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
 "crt_correct",13,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
 "crt_int",14,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
--- a/Show More
+++ b/Show More