2026-01-26 16:30:11 -05:00
178 changed files with 24078 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1 @@
+.history
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -0,0 +1,55 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "type": "R-Debugger",
+            "name": "Launch R-Workspace",
+            "request": "launch",
+            "debugMode": "workspace",
+            "workingDirectory": "${workspaceFolder}",
+            "splitOverwrittenOutput": true
+        },
+        {
+            "type": "R-Debugger",
+            "name": "Debug R-File",
+            "request": "launch",
+            "debugMode": "file",
+            "workingDirectory": "${workspaceFolder}",
+            "file": "${file}",
+            "splitOverwrittenOutput": true,
+            "stopOnEntry": false
+        },
+        {
+            "type": "R-Debugger",
+            "name": "Debug R-Function",
+            "request": "launch",
+            "debugMode": "function",
+            "workingDirectory": "${workspaceFolder}",
+            "file": "${file}",
+            "mainFunction": "main",
+            "allowGlobalDebugging": false,
+            "splitOverwrittenOutput": true
+        },
+        {
+            "type": "R-Debugger",
+            "name": "Debug R-Package",
+            "request": "launch",
+            "debugMode": "workspace",
+            "workingDirectory": "${workspaceFolder}",
+            "includePackageScopes": true,
+            "loadPackages": [
+                "."
+            ],
+            "splitOverwrittenOutput": true
+        },
+        {
+            "type": "R-Debugger",
+            "request": "attach",
+            "name": "Attach to R process",
+            "splitOverwrittenOutput": true
+        }
+    ]
+}
--- a/tables.pptx
+++ b/tables.pptx
--- a/presentation.docx
+++ b/presentation.docx
--- a/eohi1/BS_means.vb
+++ b/eohi1/BS_means.vb
@ -0,0 +1,118 @@
+Option Explicit
+
+Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
+    Dim lastCol As Long
+    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
+    Dim m As Variant
+    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
+    If IsError(m) Then
+        GetColIndex = 0
+    Else
+        GetColIndex = CLng(m)
+    End If
+End Function
+
+Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
+    Dim tmp() As Long
+    ReDim tmp(0 To UBound(headers))
+    Dim i As Long, c As Long
+    c = 0
+    For i = LBound(headers) To UBound(headers)
+        Dim colIdx As Long
+        colIdx = GetColIndex(CStr(headers(i)), ws)
+        If colIdx > 0 Then
+            tmp(c) = colIdx
+            c = c + 1
+        End If
+    Next i
+    If c = 0 Then
+        BuildPresentColArray = Array()
+    Else
+        Dim outArr() As Long
+        ReDim outArr(0 To c - 1)
+        For i = 0 To c - 1
+            outArr(i) = tmp(i)
+        Next i
+        BuildPresentColArray = outArr
+    End If
+End Function
+
+Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
+    Dim i As Long
+    Dim sumVals As Double
+    Dim countVals As Long
+    sumVals = 0
+    countVals = 0
+    If IsArray(colIndexes) Then
+        For i = LBound(colIndexes) To UBound(colIndexes)
+            Dim v As Variant
+            v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
+            If Not IsError(v) Then
+                If IsNumeric(v) Then
+                    sumVals = sumVals + CDbl(v)
+                    countVals = countVals + 1
+                End If
+            End If
+        Next i
+    End If
+    If countVals = 0 Then
+        MeanOfRow = CVErr(xlErrNA)
+    Else
+        MeanOfRow = sumVals / countVals
+    End If
+End Function
+
+Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
+    Dim c As Long
+    c = GetColIndex(headerName, ws)
+    If c = 0 Then
+        Dim lastCol As Long
+        lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
+        c = lastCol + 1
+        ws.Cells(1, c).Value = headerName
+    End If
+    EnsureOutputColumn = c
+End Function
+
+Sub BS_Means()
+    Dim ws As Worksheet
+    Set ws = ThisWorkbook.Sheets(1)
+
+    Dim all28 As Variant
+    all28 = Array( _
+        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
+        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
+    )
+
+    Dim easy14 As Variant
+    easy14 = Array( _
+        "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
+    )
+
+    Dim hard14 As Variant
+    hard14 = Array( _
+        "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
+    )
+
+    Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
+    colsAll = BuildPresentColArray(all28, ws)
+    colsEasy = BuildPresentColArray(easy14, ws)
+    colsHard = BuildPresentColArray(hard14, ws)
+
+    Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
+    colBS28 = EnsureOutputColumn(ws, "bs_28")
+    colBSEasy = EnsureOutputColumn(ws, "bs_easy")
+    colBSHard = EnsureOutputColumn(ws, "bs_hard")
+
+    Dim lastRow As Long
+    lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
+
+    Dim r As Long
+    For r = 2 To lastRow
+        ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
+        ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
+        ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
+    Next r
+End Sub
+
+
--- a/eohi1/DataP
+++ b/eohi1/DataP
@ -0,0 +1,25 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Load the data
+exp1_data <- read.csv("exp1.csv")
+
+# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
+exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
+
+# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
+exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
+
+# Save the updated data
+write.csv(exp1_data, "exp1.csv", row.names = FALSE)
+
+# Display summary of the calculated totals
+cat("NPast_mean_total summary:\n")
+summary(exp1_data$NPast_mean_total)
+cat("\nNFut_mean_total summary:\n")
+summary(exp1_data$NFut_mean_total)
+
+# Show first few rows to verify calculations
+cat("\nFirst 5 rows of calculated totals:\n")
+print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total", 
+                       "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
--- a/correlation_matrix.csv
+++ b/correlation_matrix.csv
@ -0,0 +1,17 @@
+,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
+ehi_global_mean,0.31***,,,,,,,,,,,
+sex_dummy,0.0086,0.044,,,,,,,,,,
+demo_age_1,-0.12***,-0.19***,0.0094,,,,,,,,,
+edu_num,-0.0081,0.009,-0.062*,-0.023,,,,,,,,
+AOT_total,0.11***,0.086***,-0.042,0.11,0.035,,,,,,,
+CRT_correct,0.068*,0.051,-0.15***,-0.049,0.13***,0.26***,,,,,,
+CRT_int,-0.058,-0.057,0.16***,0.071*,-0.12***,-0.21***,-0.87***,,,,,
+bs_28,-0.01,-0.006803247,-0.0022,-0.071*,-0.065*,-0.27***,-0.25***,0.23***,,,,
+bs_easy,-0.0088,0.014,-0.037,-0.30***,-0.009,-0.31***,-0.094***,0.047,0.61***,,,
+bs_hard,-0.015,-0.024,0.022,0.095**,-0.085**,-0.15***,-0.24***,0.25***,0.87***,0.18***,,
+cal_selfActual,0.015,-0.051,-0.15***,0.18***,0.12***,-0.041,0.047,-0.053,0.31***,0.05315529,0.35***,
+cal_global,0.019,-0.0047,-0.078*,0.031,0.021,-0.16***,-0.15***,0.13***,0.76***,0.37***,0.72***,0.55***
+,,,,,,,,,,,,
+*p<0.05,,,,,,,,,,,,
+**p<0.01,,,,,,,,,,,,
+***p<0.001,,,,,,,,,,,,
--- a/correlation_pvalues.csv
+++ b/correlation_pvalues.csv
@ -0,0 +1,18 @@
+,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
+ehi_global_mean,3.36865E-25,,,,,,,,,,,
+sex_dummy,0.778486413,0.154074007,,,,,,,,,,
+demo_age_1,5.04502E-05,1.68576E-10,0.759436702,,,,,,,,,
+edu_num,0.79326152,0.768960745,0.043945088,0.454222403,,,,,,,,
+AOT_total,0.000229114,0.004867793,0.168503786,0.000314432,0.259313829,,,,,,,
+CRT_correct,0.027291283,0.099344285,4.65922E-07,0.108342391,3.46509E-05,4.3812E-18,,,,,,
+CRT_int,0.05842806,0.065472289,1.53701E-07,0.021280157,6.99182E-05,3.99893E-12,0,,,,,
+bs_28,0.735913365,0.824825013,0.94333124,0.020232959,0.035132191,1.09048E-18,1.61283E-16,1.17159E-13,,,,
+bs_easy,0.775943639,0.653590097,0.22951034,3.86076E-23,0.77008947,1.15661E-24,0.002210577,0.123278134,6.2215E-108,,,
+bs_hard,0.622307404,0.443340859,0.469287124,0.001914319,0.005383608,7.67838E-07,9.71752E-16,2.00781E-16,0,2.79128E-09,,
+cal_selfActual,0.623530057,0.098794041,8.23097E-07,2.62161E-09,0.000133768,0.183121705,0.125377421,0.088656149,2.63226E-25,0.083521419,5.04214E-32,
+cal_global,0.543555525,0.879493859,0.011276459,0.318757547,0.487961367,3.0437E-07,8.00711E-07,1.20286E-05,1.4315E-202,3.1393E-35,1.2757E-171,4.92187E-86
+,,,,,,,,,,,,
+,,,,,,,,,,,,
+,p<0.05,,,,,,,,,,,
+,p<0.01,,,,,,,,,,,
+,p<0.001,,,,,,,,,,,
--- a/reliability.html
+++ b/reliability.html
@ -0,0 +1,65 @@
+<html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Two-Item Reliability Summary</h2><p>Pearson r: 0.34049 (95% CI: [0.28622, 0.39258])</p><p>Spearman r: 0.31052 (95% CI: [0.25516, 0.36386])</p><p>Spearman–Brown / Cronbach's α: 0.50801 (95% CI: [0.44506, 0.56382])</p><h2>Cronbach's Alpha</h2><table>
+ <thead>
+  <tr>
+   <th style="text-align:left;">   </th>
+   <th style="text-align:right;"> raw_alpha </th>
+   <th style="text-align:right;"> std.alpha </th>
+   <th style="text-align:right;"> G6(smc) </th>
+   <th style="text-align:right;"> average_r </th>
+   <th style="text-align:right;"> S/N </th>
+   <th style="text-align:right;"> ase </th>
+   <th style="text-align:right;"> mean </th>
+   <th style="text-align:right;"> sd </th>
+   <th style="text-align:right;"> median_r </th>
+  </tr>
+ </thead>
+<tbody>
+  <tr>
+   <td style="text-align:left;">  </td>
+   <td style="text-align:right;"> 0.3078741 </td>
+   <td style="text-align:right;"> 0.5080098 </td>
+   <td style="text-align:right;"> 0.3404914 </td>
+   <td style="text-align:right;"> 0.3404914 </td>
+   <td style="text-align:right;"> 1.032561 </td>
+   <td style="text-align:right;"> 0.0216938 </td>
+   <td style="text-align:right;"> 0.2755268 </td>
+   <td style="text-align:right;"> 0.98504 </td>
+   <td style="text-align:right;"> 0.3404914 </td>
+  </tr>
+</tbody>
+</table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.50801</p><h2>Item-Level Statistics</h2><table>
+ <thead>
+  <tr>
+   <th style="text-align:left;">   </th>
+   <th style="text-align:right;"> n </th>
+   <th style="text-align:right;"> raw.r </th>
+   <th style="text-align:right;"> std.r </th>
+   <th style="text-align:right;"> r.cor </th>
+   <th style="text-align:right;"> r.drop </th>
+   <th style="text-align:right;"> mean </th>
+   <th style="text-align:right;"> sd </th>
+  </tr>
+ </thead>
+<tbody>
+  <tr>
+   <td style="text-align:left;"> eohiDGEN_mean </td>
+   <td style="text-align:right;"> 1063 </td>
+   <td style="text-align:right;"> 0.9706327 </td>
+   <td style="text-align:right;"> 0.8186853 </td>
+   <td style="text-align:right;"> 0.4777163 </td>
+   <td style="text-align:right;"> 0.3404914 </td>
+   <td style="text-align:right;"> 0.4148824 </td>
+   <td style="text-align:right;"> 1.740598 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehi_global_mean </td>
+   <td style="text-align:right;"> 1063 </td>
+   <td style="text-align:right;"> 0.5566839 </td>
+   <td style="text-align:right;"> 0.8186853 </td>
+   <td style="text-align:right;"> 0.4777163 </td>
+   <td style="text-align:right;"> 0.3404914 </td>
+   <td style="text-align:right;"> 0.1361712 </td>
+   <td style="text-align:right;"> 0.504053 </td>
+  </tr>
+</tbody>
+</table></body></html>
--- a/eohi1/Rplots.pdf
+++ b/eohi1/Rplots.pdf
--- a/eohi1/accuracy
+++ b/eohi1/accuracy
--- a/eohi1/age_DGEN_assumptions.png
+++ b/eohi1/age_DGEN_assumptions.png
--- a/eohi1/age_DGEN_plot.png
+++ b/eohi1/age_DGEN_plot.png
--- a/eohi1/age_domain_assumptions.png
+++ b/eohi1/age_domain_assumptions.png
--- a/eohi1/age_domain_plot.png
+++ b/eohi1/age_domain_plot.png
--- a/eohi1/assumption_checks_before_cronbach.r
+++ b/eohi1/assumption_checks_before_cronbach.r
@ -0,0 +1,164 @@
+# Assumption Checks Before Cronbach's Alpha Analysis
+# Run this BEFORE the main reliability analysis
+
+library(psych)
+library(corrplot)
+library(ggplot2)
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+# Define scale variables
+past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
+                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
+
+past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
+                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
+
+past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
+                  "NPastDiff_val_performance", "NPastDiff_val_justice")
+
+past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
+                   "NPastDiff_life_important", "NPastDiff_life_change")
+
+# Function to check assumptions for a scale
+check_assumptions <- function(data, var_names, scale_name) {
+  cat("\n", "="*60, "\n")
+  cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
+  cat("="*60, "\n")
+  
+  # Get scale data
+  scale_data <- data[, var_names]
+  
+  # 1. Sample size check
+  complete_cases <- sum(complete.cases(scale_data))
+  cat("1. SAMPLE SIZE CHECK:\n")
+  cat("   Total participants:", nrow(data), "\n")
+  cat("   Complete cases:", complete_cases, "\n")
+  cat("   Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
+  
+  if(complete_cases < 30) {
+    cat("   WARNING: Sample size too small for reliable alpha estimates\n")
+    return(FALSE)
+  }
+  
+  # 2. Missing data check
+  cat("\n2. MISSING DATA CHECK:\n")
+  missing_counts <- colSums(is.na(scale_data))
+  missing_pct <- round(missing_counts / nrow(data) * 100, 2)
+  cat("   Missing data by item:\n")
+  for(i in 1:length(var_names)) {
+    cat("   ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
+  }
+  
+  max_missing <- max(missing_pct)
+  cat("   Maximum missing:", max_missing, "%\n")
+  cat("   Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
+  
+  # 3. Use only complete cases for remaining checks
+  complete_data <- scale_data[complete.cases(scale_data), ]
+  
+  # 4. Normality check (Shapiro-Wilk test on first item as example)
+  cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
+  if(nrow(complete_data) <= 5000) {  # Shapiro-Wilk has sample size limit
+    shapiro_result <- shapiro.test(complete_data[, 1])
+    cat("   p-value:", round(shapiro_result$p.value, 4), "\n")
+    cat("   Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
+  } else {
+    cat("   Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
+  }
+  
+  # 5. Inter-item correlations check
+  cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
+  cor_matrix <- cor(complete_data)
+  
+  # Get off-diagonal correlations
+  cor_matrix[lower.tri(cor_matrix)] <- NA
+  diag(cor_matrix) <- NA
+  cors <- as.vector(cor_matrix)
+  cors <- cors[!is.na(cors)]
+  
+  positive_cors <- sum(cors > 0)
+  strong_cors <- sum(cors > 0.30)
+  negative_cors <- sum(cors < 0)
+  
+  cat("   Total correlations:", length(cors), "\n")
+  cat("   Positive correlations:", positive_cors, "\n")
+  cat("   Strong correlations (>0.30):", strong_cors, "\n")
+  cat("   Negative correlations:", negative_cors, "\n")
+  cat("   Mean correlation:", round(mean(cors), 4), "\n")
+  cat("   Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
+  
+  if(negative_cors > 0) {
+    cat("   ⚠️  WARNING: Negative correlations suggest potential issues\n")
+  }
+  if(strong_cors / length(cors) < 0.5) {
+    cat("   ⚠️  WARNING: Many weak correlations may indicate poor scale coherence\n")
+  }
+  
+  # 6. Item variance check
+  cat("\n5. ITEM VARIANCE CHECK:\n")
+  item_vars <- apply(complete_data, 2, var)
+  var_ratio <- max(item_vars) / min(item_vars)
+  cat("   Item variances:", round(item_vars, 4), "\n")
+  cat("   Variance ratio (max/min):", round(var_ratio, 4), "\n")
+  cat("   Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
+  
+  # 7. Outlier check
+  cat("\n6. OUTLIER CHECK:\n")
+  # Check for multivariate outliers using Mahalanobis distance
+  if(nrow(complete_data) > ncol(complete_data)) {
+    mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
+    outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
+    outliers <- sum(mahal_dist > outlier_threshold)
+    cat("   Multivariate outliers (p<0.001):", outliers, "\n")
+    cat("   Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
+  }
+  
+  # 8. Summary recommendation
+  cat("\n7. OVERALL RECOMMENDATION:\n")
+  issues <- 0
+  if(complete_cases < 30) issues <- issues + 1
+  if(max_missing >= 20) issues <- issues + 1
+  if(negative_cors > 0) issues <- issues + 1
+  if(var_ratio >= 4) issues <- issues + 1
+  
+  if(issues == 0) {
+    cat("   ✓ PROCEED with Cronbach's alpha analysis\n")
+  } else if(issues <= 2) {
+    cat("   ⚠️  PROCEED with CAUTION - some assumptions violated\n")
+  } else {
+    cat("   ✗ CONSIDER alternatives or data cleaning before proceeding\n")
+  }
+  
+  return(TRUE)
+}
+
+# Check assumptions for all past scales
+cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
+cat("\nData: exp1.csv")
+cat("\nTotal sample size:", nrow(data))
+
+check_assumptions(data, past_pref_vars, "Past Preferences")
+check_assumptions(data, past_pers_vars, "Past Personality") 
+check_assumptions(data, past_val_vars, "Past Values")
+check_assumptions(data, past_life_vars, "Past Life Satisfaction")
+
+# Quick check of future scales (you can expand this)
+fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
+                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
+
+check_assumptions(data, fut_pref_vars, "Future Preferences")
+
+cat("\n", "="*60, "\n")
+cat("GENERAL GUIDELINES:\n")
+cat("="*60, "\n")
+cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
+cat("⚠️  If some assumptions are violated, interpret with caution\n")
+cat("✗ If many assumptions are violated, consider alternative approaches:\n")
+cat("   - Omega coefficient (more robust to violations)\n")
+cat("   - Split-half reliability\n")
+cat("   - Test-retest reliability\n")
+cat("   - Factor analysis to check dimensionality\n")
+
+
--- a/eohi1/brierVARS.vb
+++ b/eohi1/brierVARS.vb
@ -0,0 +1,141 @@
+Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
+    Dim lastCol As Long
+    lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
+    On Error Resume Next
+    Dim m As Variant
+    m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
+    On Error GoTo 0
+    If IsError(m) Then
+        GetCol = 0
+    Else
+        GetCol = CLng(m)
+    End If
+End Function
+
+Private Function NormalizeTruth(ByVal v As Variant) As Variant
+    ' Returns True/False or Null if cannot determine
+    If VarType(v) = vbBoolean Then
+        NormalizeTruth = v
+        Exit Function
+    End If
+    If IsError(v) Or IsEmpty(v) Then
+        NormalizeTruth = Null
+        Exit Function
+    End If
+    Dim s As String
+    s = Trim$(UCase$(CStr(v)))
+    Select Case s
+        Case "TRUE", "T", "1", "YES", "Y"
+            NormalizeTruth = True
+        Case "FALSE", "F", "0", "NO", "N"
+            NormalizeTruth = False
+        Case Else
+            NormalizeTruth = Null
+    End Select
+End Function
+
+Private Function NormalizeProb01(ByVal v As Variant) As Double
+    ' Converts confidence values to [0,1]
+    If IsError(v) Or IsEmpty(v) Then
+        NormalizeProb01 = -1
+        Exit Function
+    End If
+    Dim s As String
+    s = CStr(v)
+    If InStr(s, "%") > 0 Then
+        s = Replace$(s, "%", "")
+        If IsNumeric(s) Then
+            NormalizeProb01 = CDbl(s) / 100#
+            Exit Function
+        End If
+    End If
+    If IsNumeric(v) Then
+        Dim d As Double
+        d = CDbl(v)
+        If d > 1# Then
+            NormalizeProb01 = d / 100#
+        Else
+            NormalizeProb01 = d
+        End If
+    Else
+        NormalizeProb01 = -1
+    End If
+End Function
+
+Sub brierVARS()
+    Dim false_vars As Variant
+    Dim true_vars As Variant
+    false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
+    true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
+    Dim target_headers As Variant
+    target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
+    Dim ws As Worksheet
+    Set ws = ThisWorkbook.Sheets(1)
+    Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
+    Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
+    Dim result As Variant
+    Dim r As Long, j As Long
+    rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
+    For i = 0 To UBound(false_vars) Step 2
+        srcVar = false_vars(i)
+        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
+        colSource1 = GetCol(srcVar, ws)
+        colSourceCON = GetCol(false_vars(i + 1), ws)
+        If colSource1 > 0 And colSourceCON > 0 Then
+            For r = 2 To rowCount
+                checkVal = ws.Cells(r, colSource1).Value
+                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
+                colTarget = 0
+                For j = 0 To UBound(target_headers)
+                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
+                        colTarget = GetCol(target_headers(j), ws)
+                        Exit For
+                    End If
+                Next j
+                If colTarget > 0 Then
+                    Dim truth As Variant
+                    truth = NormalizeTruth(checkVal)
+                    If IsNull(truth) Or val < 0 Then
+                        result = CVErr(xlErrNA)
+                    ElseIf truth = True Then
+                        result = (val - 0#) ^ 2
+                    Else
+                        result = ((1# - val) - 0#) ^ 2
+                    End If
+                    ws.Cells(r, colTarget).Value = result
+                End If
+            Next r
+        End If
+    Next i
+    For i = 0 To UBound(true_vars) Step 2
+        srcVar = true_vars(i)
+        matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
+        colSource1 = GetCol(srcVar, ws)
+        colSourceCON = GetCol(true_vars(i + 1), ws)
+        If colSource1 > 0 And colSourceCON > 0 Then
+            For r = 2 To rowCount
+                checkVal = ws.Cells(r, colSource1).Value
+                val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
+                colTarget = 0
+                For j = 0 To UBound(target_headers)
+                    If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
+                        colTarget = GetCol(target_headers(j), ws)
+                        Exit For
+                    End If
+                Next j
+                If colTarget > 0 Then
+                    Dim truth2 As Variant
+                    truth2 = NormalizeTruth(checkVal)
+                    If IsNull(truth2) Or val < 0 Then
+                        result = CVErr(xlErrNA)
+                    ElseIf truth2 = True Then
+                        result = (val - 1#) ^ 2
+                    Else
+                        result = ((1# - val) - 1#) ^ 2
+                    End If
+                    ws.Cells(r, colTarget).Value = result
+                End If
+            Next r
+        End If
+    Next i
+End Sub
--- a/eohi1/corr_bs_all.csv
+++ b/eohi1/corr_bs_all.csv
@ -0,0 +1,57 @@
+group,var_x,var_y,n,r,p
+BS_vs_Cal,bs_28,cal_global,1063,0.761895605,2.37E-202
+BS_vs_Cal,bs_easy,cal_global,1063,0.367329651,2.67E-35
+BS_vs_Cal,bs_hard,cal_global,1063,0.721638057,1.16E-171
+BS_vs_Cal,bs_28,cal_selfActual,1063,0.312906942,1.41E-25
+BS_vs_Cal,bs_easy,cal_selfActual,1063,0.054307733,0.07675131
+BS_vs_Cal,bs_hard,cal_selfActual,1063,0.351630149,2.70E-32
+BS_vs_EOHI,bs_28,ehi_global_mean,1063,-0.005868845,0.848429656
+BS_vs_EOHI,bs_easy,ehi_global_mean,1063,0.014251763,0.642549738
+BS_vs_EOHI,bs_hard,ehi_global_mean,1063,-0.022683413,0.460035927
+Cal_vs_EOHI,cal_global,ehi_global_mean,1063,-0.004912877,0.872888653
+Cal_vs_EOHI,cal_selfActual,ehi_global_mean,1063,-0.049727552,0.105147189
+BS_vs_EOHI,bs_28,ehi_life_mean,1063,-0.007684801,0.802384957
+BS_vs_EOHI,bs_easy,ehi_life_mean,1063,-0.024372981,0.427293321
+BS_vs_EOHI,bs_hard,ehi_life_mean,1063,-0.001049006,0.972748582
+Cal_vs_EOHI,cal_global,ehi_life_mean,1063,-0.002906394,0.924594545
+Cal_vs_EOHI,cal_selfActual,ehi_life_mean,1063,0.028247567,0.357533744
+BS_vs_EOHI,bs_28,ehi_pers_mean,1063,-0.00684928,0.823495332
+BS_vs_EOHI,bs_easy,ehi_pers_mean,1063,-0.009992311,0.744870091
+BS_vs_EOHI,bs_hard,ehi_pers_mean,1063,-0.013361509,0.663459912
+Cal_vs_EOHI,cal_global,ehi_pers_mean,1063,-0.009160631,0.765455012
+Cal_vs_EOHI,cal_selfActual,ehi_pers_mean,1063,-0.082807345,0.006907147
+BS_vs_EOHI,bs_28,ehi_pref_mean,1063,-0.009153426,0.765634093
+BS_vs_EOHI,bs_easy,ehi_pref_mean,1063,0.038767348,0.20660862
+BS_vs_EOHI,bs_hard,ehi_pref_mean,1063,-0.033018506,0.282127883
+Cal_vs_EOHI,cal_global,ehi_pref_mean,1063,0.009717642,0.751649131
+Cal_vs_EOHI,cal_selfActual,ehi_pref_mean,1063,-0.044228709,0.149576975
+BS_vs_EOHI,bs_28,ehi_val_mean,1063,0.011477853,0.708558862
+BS_vs_EOHI,bs_easy,ehi_val_mean,1063,0.048673437,0.11273791
+BS_vs_EOHI,bs_hard,ehi_val_mean,1063,-0.008316877,0.786509127
+Cal_vs_EOHI,cal_global,ehi_val_mean,1063,-0.024447239,0.425886059
+Cal_vs_EOHI,cal_selfActual,ehi_val_mean,1063,-0.099629152,0.001143742
+BS_vs_EOHI,bs_28,eohiDGEN_life,1063,-0.011893558,0.69851102
+BS_vs_EOHI,bs_easy,eohiDGEN_life,1063,-0.087676604,0.004226705
+BS_vs_EOHI,bs_hard,eohiDGEN_life,1063,0.036722132,0.231590633
+Cal_vs_EOHI,cal_global,eohiDGEN_life,1063,0.004140068,0.892751642
+Cal_vs_EOHI,cal_selfActual,eohiDGEN_life,1063,0.036877484,0.229620994
+BS_vs_EOHI,bs_28,eohiDGEN_mean,1063,-0.008492209,0.782120675
+BS_vs_EOHI,bs_easy,eohiDGEN_mean,1063,-0.007458252,0.808095459
+BS_vs_EOHI,bs_hard,eohiDGEN_mean,1063,-0.013570736,0.658521255
+Cal_vs_EOHI,cal_global,eohiDGEN_mean,1063,0.019739833,0.520291178
+Cal_vs_EOHI,cal_selfActual,eohiDGEN_mean,1063,0.016902147,0.582001734
+BS_vs_EOHI,bs_28,eohiDGEN_pers,1063,-0.013831247,0.652392826
+BS_vs_EOHI,bs_easy,eohiDGEN_pers,1063,-0.013771822,0.653788727
+BS_vs_EOHI,bs_hard,eohiDGEN_pers,1063,-0.013837654,0.652242395
+Cal_vs_EOHI,cal_global,eohiDGEN_pers,1063,-0.010393397,0.735006441
+Cal_vs_EOHI,cal_selfActual,eohiDGEN_pers,1063,-0.028354225,0.355720599
+BS_vs_EOHI,bs_28,eohiDGEN_pref,1063,0.002305478,0.940152072
+BS_vs_EOHI,bs_easy,eohiDGEN_pref,1063,0.014268945,0.642148877
+BS_vs_EOHI,bs_hard,eohiDGEN_pref,1063,-0.006108935,0.842308668
+Cal_vs_EOHI,cal_global,eohiDGEN_pref,1063,0.024379777,0.427164421
+Cal_vs_EOHI,cal_selfActual,eohiDGEN_pref,1063,0.008387657,0.784736723
+BS_vs_EOHI,bs_28,eohiDGEN_val,1063,-0.043411125,0.157255062
+BS_vs_EOHI,bs_easy,eohiDGEN_val,1063,-0.058087693,0.058325548
+BS_vs_EOHI,bs_hard,eohiDGEN_val,1063,-0.032241181,0.293618158
+Cal_vs_EOHI,cal_global,eohiDGEN_val,1063,-0.032038903,0.296658895
+Cal_vs_EOHI,cal_selfActual,eohiDGEN_val,1063,0.035471252,0.247887232
--- a/eohi1/correlation
+++ b/eohi1/correlation
@ -0,0 +1,103 @@
+options(scipen = 999)
+
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+df <- read.csv("ehi1.csv")
+
+data <- df %>%
+  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
+  filter(demo_sex != "Prefer not to say")
+
+print(colSums(is.na(data)))
+print(sapply(data, class))
+
+# Create dummy variable for sex (0 = Male, 1 = Female)
+data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
+
+# Verify the dummy coding
+print(table(data$demo_sex, data$sex_dummy))
+
+#descriptives
+
+# Descriptives for age
+print(summary(data$demo_age_1))
+print(sd(data$demo_age_1, na.rm = TRUE))
+
+# Center demo_age_1 (subtract the mean)
+data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
+
+# Verify the centering
+print(summary(data$age_centered))
+
+# Descriptives for sex (frequency table)
+print(table(data$demo_sex))
+print(prop.table(table(data$demo_sex)))
+
+# Descriptives for sex dummy variable
+print(table(data$sex_dummy))
+
+# Convert edu3 to numeric factor for correlations (1, 2, 3)
+# First ensure edu3 is a factor, then convert to numeric
+data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
+data$edu_num <- as.numeric(data$edu3)
+
+# Check the numeric conversion
+print(table(data$edu_num, useNA = "ifany"))
+
+# Verify the conversion
+print(table(data$edu3, data$edu_num, useNA = "ifany"))
+
+####correlation matrix ####
+
+# Select numeric variables for correlation matrix
+numeric_vars <- data %>%
+  select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
+
+# Create Spearman correlation matrix
+cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
+
+# Print correlation matrix
+print(round(cor_matrix, 3))
+
+# Get significance tests for correlations using psych package
+library(psych)
+
+# Create correlation matrix with significance tests
+cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
+
+# Print correlation matrix
+print(round(cor_test$r, 3))
+
+# Print p-values
+print(round(cor_test$p, 3))
+
+# Print all correlations with r and p values (for reporting)
+for(i in 1:nrow(cor_test$r)) {
+  for(j in 1:ncol(cor_test$r)) {
+    if(i != j) {  # Skip diagonal
+      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
+          ": r =", round(cor_test$r[i, j], 3), 
+          ", p =", round(cor_test$p[i, j], 3), "\n")
+    }
+  }
+}
+
+# Also print significant correlations summary
+sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
+if(nrow(sig_cors) > 0) {
+  for(i in 1:nrow(sig_cors)) {
+    row_idx <- sig_cors[i, 1]
+    col_idx <- sig_cors[i, 2]
+    if(row_idx != col_idx) {  # Skip diagonal
+      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
+          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
+          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
+    }
+  }
+}
+
+# Save correlation matrix and p-values to CSV files
+write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
+write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
--- a/eohi1/correlation_exp1.csv
+++ b/eohi1/correlation_exp1.csv
@ -0,0 +1,31 @@
+Variable1,Variable2,Spearman_r,P_value
+ehi_global_mean,AOT_total,0.084899905,0.005609163
+ehi_life_mean,AOT_total,0.072949943,0.017368706
+ehi_pref_mean,CRT_correct,0.068917794,0.024639893
+ehi_pref_mean,CRT_int,-0.064728497,0.034848248
+eohiDGEN_life,AOT_total,0.117946108,0.000116007
+eohiDGEN_mean,AOT_total,0.111022973,0.000287049
+eohiDGEN_mean,CRT_correct,0.066645932,0.029798224
+eohiDGEN_pers,AOT_total,0.091596281,0.002797567
+eohiDGEN_pref,AOT_total,0.068355715,0.025838342
+eohiDGEN_val,AOT_total,0.103955004,0.000687468
+eohiDGEN_val,CRT_correct,0.074789341,0.014729792
+eohiDGEN_val,CRT_int,-0.06044435,0.048816455
+eohiDGEN_pref,CRT_int,-0.058185337,0.057903079
+ehi_global_mean,CRT_int,-0.057457963,0.061112
+eohiDGEN_mean,CRT_int,-0.057307383,0.061794359
+eohiDGEN_pref,CRT_correct,0.053610168,0.080621938
+ehi_global_mean,CRT_correct,0.051329314,0.094394596
+ehi_pers_mean,CRT_correct,0.051328061,0.094402649
+ehi_pref_mean,AOT_total,0.05039887,0.100528033
+ehi_pers_mean,CRT_int,-0.049058279,0.109918146
+eohiDGEN_life,CRT_correct,0.039494214,0.198218491
+ehi_val_mean,CRT_int,-0.037910532,0.216825816
+ehi_pers_mean,AOT_total,0.031316388,0.307691267
+eohiDGEN_pers,CRT_correct,0.025658263,0.403319679
+eohiDGEN_pers,CRT_int,-0.017319242,0.572720797
+ehi_life_mean,CRT_correct,0.016392301,0.593440889
+ehi_val_mean,AOT_total,0.015193625,0.620731537
+ehi_val_mean,CRT_correct,0.00599121,0.845308845
+ehi_life_mean,CRT_int,-0.005811627,0.849889753
+eohiDGEN_life,CRT_int,-0.001303724,0.966135022
--- a/eohi1/correlation_plot_scales_pearson.pdf
+++ b/eohi1/correlation_plot_scales_pearson.pdf
--- a/eohi1/correlation_plot_scales_spearman.pdf
+++ b/eohi1/correlation_plot_scales_spearman.pdf
--- a/eohi1/correlations
+++ b/eohi1/correlations
@ -0,0 +1,81 @@
+# Load required libraries
+library(Hmisc)
+library(knitr)
+library(dplyr)
+library(corrr)
+library(broom)
+library(purrr)
+library(tidyr)
+library(tibble)
+library(boot)
+
+options(scipen = 999)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+# Load data
+df1 <- read.csv("ehi1.csv")
+
+# Keep only required columns for the analysis
+bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
+eohi_vars <- c(
+  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
+  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
+)
+cal_vars  <- c("cal_selfActual","cal_global")
+
+df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
+
+# --- Brier score correlations vs EOHIs and Calibration ---
+
+# Variables
+bs_vars   <- c("bs_28", "bs_easy", "bs_hard")
+eohi_vars <- c(
+  "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
+  "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
+)
+cal_vars  <- c("cal_selfActual","cal_global")
+
+# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
+corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
+  grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
+  results <- purrr::pmap_dfr(grid, function(x, y) {
+    xv <- df[[x]]; yv <- df[[y]]
+    ok <- is.finite(xv) & is.finite(yv)
+    if (sum(ok) < 3) {
+      return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
+    }
+    ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
+    tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
+  })
+  dplyr::arrange(results, var_x, var_y)
+}
+
+# Compute correlations (Spearman only)
+corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
+corr_bs_cal  <- corr_tidy(df1, bs_vars, cal_vars,  method = "spearman")
+corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
+
+# Wide r-only tables (optional)
+to_wide <- function(d) {
+  tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
+}
+wide_bs_eohi <- to_wide(corr_bs_eohi)
+wide_bs_cal  <- to_wide(corr_bs_cal)
+wide_cal_eohi <- to_wide(corr_cal_eohi)
+
+# Display
+print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
+print(corr_bs_eohi)
+print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
+print(corr_bs_cal)
+print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
+print(corr_cal_eohi)
+
+# Export a single CSV combining all sets
+corr_bs_eohi$group   <- "BS_vs_EOHI"
+corr_bs_cal$group    <- "BS_vs_Cal"
+corr_cal_eohi$group  <- "Cal_vs_EOHI"
+
+corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
+  dplyr::relocate(group, .before = var_x)
+write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
--- a/eohi1/correlations
+++ b/eohi1/correlations
@ -0,0 +1,303 @@
+# Load required libraries
+library(Hmisc)
+library(knitr)
+library(dplyr)
+library(corrr)
+library(broom)
+library(purrr)
+library(tidyr)
+library(tibble)
+library(boot)
+
+options(scipen = 999)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+# Load data
+df1 <- read.csv("exp1.csv")
+
+# Remove columns with all NA values
+df1 <- df1 %>% select(where(~ !all(is.na(.))))
+
+# Select variables of interest
+eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean", 
+               "eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
+cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
+
+# Create dataset with selected variables
+df <- df1[, c(eohi_vars, cal_vars)]
+
+# Ensure all selected variables are numeric
+df <- df %>%
+  mutate(across(everything(), as.numeric))
+
+# Remove rows with any missing values for correlation analysis
+df_complete <- df[complete.cases(df), ]
+
+cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
+cat("Total sample size:", nrow(df), "\n")
+
+str(df)
+summary(df)
+####==== DESCRIPTIVE STATISTICS ====
+
+# Function to compute descriptive statistics
+get_descriptives <- function(data, vars) {
+  desc_stats <- data %>%
+    select(all_of(vars)) %>%
+    summarise(across(everything(), list(
+      n = ~sum(!is.na(.)),
+      mean = ~mean(., na.rm = TRUE),
+      sd = ~sd(., na.rm = TRUE),
+      min = ~min(., na.rm = TRUE),
+      max = ~max(., na.rm = TRUE),
+      median = ~median(., na.rm = TRUE),
+      q25 = ~quantile(., 0.25, na.rm = TRUE),
+      q75 = ~quantile(., 0.75, na.rm = TRUE)
+    ))) %>%
+    pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
+    separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
+    pivot_wider(names_from = stat, values_from = value) %>%
+    mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
+  
+  return(desc_stats)
+}
+
+# Get descriptives for EOHI variables
+eohi_descriptives <- get_descriptives(df, eohi_vars)
+cat("\n=== EOHI Variables Descriptives ===\n")
+print(eohi_descriptives)
+
+# Get descriptives for calibration variables
+cal_descriptives <- get_descriptives(df, cal_vars)
+cat("\n=== Calibration Variables Descriptives ===\n")
+print(cal_descriptives)
+
+# Check for bimodal or unusual distributions
+hist(df$eohi_pref)
+hist(df$cal_selfActual)
+
+# Look for extreme values
+# boxplot(df$eohi_pref)
+# boxplot(df$cal_selfActual)
+
+# Test normality for each variable - probably unnecessary
+library(nortest)
+
+# Test EOHI variables
+for(var in eohi_vars) {
+  cat("\n", var, "normality test:\n")
+  print(shapiro.test(df_complete[[var]]))
+}
+
+# Test calibration variables  
+for(var in cal_vars) {
+  cat("\n", var, "normality test:\n")
+  print(shapiro.test(df_complete[[var]]))
+}
+
+####==== PEARSON CORRELATIONS ====
+
+# Compute correlation matrix with p-values
+cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
+
+# Extract correlation coefficients
+cor_pearson <- cor_results_pearson$r
+
+# Extract p-values
+p_matrix_pearson <- cor_results_pearson$P
+
+# Function to add significance stars
+corstars <- function(cor_mat, p_mat) {
+  stars <- ifelse(p_mat < 0.001, "***",
+                  ifelse(p_mat < 0.01, "**",
+                         ifelse(p_mat < 0.05, "*", "")))
+  
+  # Combine correlation values with stars, rounded to 5 decimal places
+  cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
+                           nrow = nrow(cor_mat))
+  
+  # Set row and column names
+  rownames(cor_with_stars) <- rownames(cor_mat)
+  colnames(cor_with_stars) <- colnames(cor_mat)
+  
+  return(cor_with_stars)
+}
+
+# Apply the function
+cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
+
+cat("\n=== PEARSON CORRELATIONS ===\n")
+print(cor_table_pearson, quote = FALSE)
+
+# Extract specific correlations between EOHI and calibration variables
+eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
+eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
+
+cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
+for(i in 1:nrow(eohi_cal_correlations)) {
+  for(j in 1:ncol(eohi_cal_correlations)) {
+    cor_val <- eohi_cal_correlations[i, j]
+    p_val <- eohi_cal_pvalues[i, j]
+    star <- ifelse(p_val < 0.001, "***",
+                   ifelse(p_val < 0.01, "**",
+                          ifelse(p_val < 0.05, "*", "")))
+    cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n", 
+                rownames(eohi_cal_correlations)[i], 
+                colnames(eohi_cal_correlations)[j], 
+                cor_val, star, p_val))
+  }
+}
+
+####==== SPEARMAN CORRELATIONS ====
+
+# Compute Spearman correlation matrix with p-values
+cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
+
+# Extract correlation coefficients
+cor_spearman <- cor_results_spearman$r
+
+# Extract p-values
+p_matrix_spearman <- cor_results_spearman$P
+
+# Apply the function
+cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
+
+cat("\n=== SPEARMAN CORRELATIONS ===\n")
+print(cor_table_spearman, quote = FALSE)
+
+# Extract specific correlations between EOHI and calibration variables
+eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
+eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
+
+cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
+for(i in 1:nrow(eohi_cal_correlations_spearman)) {
+  for(j in 1:ncol(eohi_cal_correlations_spearman)) {
+    cor_val <- eohi_cal_correlations_spearman[i, j]
+    p_val <- eohi_cal_pvalues_spearman[i, j]
+    star <- ifelse(p_val < 0.001, "***",
+                   ifelse(p_val < 0.01, "**",
+                          ifelse(p_val < 0.05, "*", "")))
+    cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n", 
+                rownames(eohi_cal_correlations_spearman)[i], 
+                colnames(eohi_cal_correlations_spearman)[j], 
+                cor_val, star, p_val))
+  }
+}
+
+####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
+
+# Function to compute correlation with bootstrap CI
+bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
+  # Remove missing values
+  complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
+  
+  if(nrow(complete_data) < 3) {
+    return(data.frame(
+      correlation = NA,
+      ci_lower = NA,
+      ci_upper = NA,
+      n = nrow(complete_data)
+    ))
+  }
+  
+  # Bootstrap function
+  boot_fun <- function(data, indices) {
+    cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
+  }
+  
+  # Perform bootstrap
+  set.seed(123)  # for reproducibility
+  boot_results <- boot(complete_data, boot_fun, R = R)
+  
+  # Calculate confidence interval
+  ci <- boot.ci(boot_results, type = "perc")
+  
+  return(data.frame(
+    correlation = boot_results$t0,
+    ci_lower = ci$perc[4],
+    ci_upper = ci$perc[5],
+    n = nrow(complete_data)
+  ))
+}
+
+# Compute bootstrap CIs for all EOHI x Calibration correlations
+cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
+bootstrap_results_pearson <- expand.grid(
+  eohi_var = eohi_vars,
+  cal_var = cal_vars,
+  stringsAsFactors = FALSE
+) %>%
+  pmap_dfr(function(eohi_var, cal_var) {
+    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
+    result$eohi_var <- eohi_var
+    result$cal_var <- cal_var
+    result$method <- "pearson"
+    return(result)
+  }) %>%
+  mutate(
+    correlation = round(correlation, 5),
+    ci_lower = round(ci_lower, 5),
+    ci_upper = round(ci_upper, 5)
+  )
+
+print(bootstrap_results_pearson)
+
+cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
+bootstrap_results_spearman <- expand.grid(
+  eohi_var = eohi_vars,
+  cal_var = cal_vars,
+  stringsAsFactors = FALSE
+) %>%
+  pmap_dfr(function(eohi_var, cal_var) {
+    result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
+    result$eohi_var <- eohi_var
+    result$cal_var <- cal_var
+    result$method <- "spearman"
+    return(result)
+  }) %>%
+  mutate(
+    correlation = round(correlation, 5),
+    ci_lower = round(ci_lower, 5),
+    ci_upper = round(ci_upper, 5)
+  )
+
+print(bootstrap_results_spearman)
+
+####==== SUMMARY TABLE ====
+
+# Create comprehensive summary table
+summary_table <- bootstrap_results_pearson %>%
+  select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
+  rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
+  left_join(
+    bootstrap_results_spearman %>%
+      select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
+      rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
+    by = c("eohi_var", "cal_var")
+  ) %>%
+  # Add p-values
+  left_join(
+    expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
+      pmap_dfr(function(eohi_var, cal_var) {
+        pearson_p <- p_matrix_pearson[eohi_var, cal_var]
+        spearman_p <- p_matrix_spearman[eohi_var, cal_var]
+        data.frame(
+          eohi_var = eohi_var,
+          cal_var = cal_var,
+          pearson_p = pearson_p,
+          spearman_p = spearman_p
+        )
+      }),
+    by = c("eohi_var", "cal_var")
+  ) %>%
+  mutate(
+    pearson_p = round(pearson_p, 5),
+    spearman_p = round(spearman_p, 5)
+  )
+
+cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
+print(summary_table)
+
+# Save results to CSV
+# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
+# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
--- a/eohi1/correlations
+++ b/eohi1/correlations
@ -0,0 +1,171 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Load required libraries
+library(corrplot)
+library(Hmisc)
+library(psych)
+
+# Load the data
+exp1_data <- read.csv("ehi1.csv")
+
+# Define the two sets of variables
+set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
+               "ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
+set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
+
+# Create subset with only the variables of interest
+correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
+
+# ===== NORMALITY CHECKS =====
+# Shapiro-Wilk tests for normality
+for(var in names(correlation_data)) {
+  shapiro_result <- shapiro.test(correlation_data[[var]])
+  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
+              var, shapiro_result$p.value, 
+              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
+}
+
+# Visual normality checks
+pdf("normality_plots.pdf", width = 12, height = 8)
+par(mfrow = c(2, 4))
+for(var in names(correlation_data)) {
+  # Histogram with normal curve overlay
+  hist(correlation_data[[var]], main = paste("Histogram:", var), 
+       xlab = var, freq = FALSE)
+  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
+              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
+        add = TRUE, col = "red", lwd = 2)
+  
+  # Q-Q plot
+  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
+  qqline(correlation_data[[var]], col = "red", lwd = 2)
+}
+dev.off()
+
+# ===== LINEARITY CHECKS =====
+# Check linearity between variable pairs
+pdf("linearity_plots.pdf", width = 15, height = 10)
+par(mfrow = c(3, 5))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    # Scatter plot with regression line
+    plot(correlation_data[[var1]], correlation_data[[var2]], 
+         main = paste(var1, "vs", var2),
+         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
+    
+    # Add linear regression line
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    abline(lm_fit, col = "red", lwd = 2)
+    
+    # Add LOESS smooth line for non-linear pattern detection
+    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
+    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
+                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
+    loess_pred <- predict(loess_fit, x_seq)
+    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
+    
+    # Calculate R-squared for linear fit
+    r_squared <- summary(lm_fit)$r.squared
+    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
+  }
+}
+dev.off()
+
+# Residual analysis for linearity
+pdf("residual_plots.pdf", width = 15, height = 10)
+par(mfrow = c(3, 5))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    residuals <- residuals(lm_fit)
+    fitted <- fitted(lm_fit)
+    
+    plot(fitted, residuals, 
+         main = paste("Residuals:", var1, "vs", var2),
+         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
+    abline(h = 0, col = "red", lwd = 2)
+    
+    # Add smooth line to residuals
+    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
+  }
+}
+dev.off()
+
+
+# Calculate correlation matrix (Spearman only)
+cor_matrix_spearman <- cor(correlation_data, method = "spearman")
+
+# Print correlation matrix with 5 decimal places
+print(round(cor_matrix_spearman, 5))
+
+# Separate correlations between the two sets (Spearman)
+set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
+print(round(set1_set2_cor, 5))
+
+# Calculate correlations within each set (Spearman)
+set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
+set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
+
+# Statistical significance tests (Spearman)
+cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
+
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
+  }
+}
+
+# Create correlation plot (Spearman only)
+pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
+corrplot(cor_matrix_spearman, method = "color", type = "upper", 
+         order = "hclust", tl.cex = 0.8, tl.col = "black",
+         addCoef.col = "black", number.cex = 0.7,
+         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
+dev.off()
+
+# Summary statistics
+desc_stats <- describe(correlation_data)
+print(round(desc_stats, 5))
+
+# Save results to CSV files
+write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
+write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
+
+# Save correlation results in a formatted table
+cor_results <- data.frame(
+  Variable1 = character(),
+  Variable2 = character(),
+  Spearman_r = numeric(),
+  P_value = numeric(),
+  stringsAsFactors = FALSE
+)
+
+# Extract significant correlations between sets
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    r_val <- cor_matrix_spearman[var1, var2]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    
+    cor_results <- rbind(cor_results, data.frame(
+      Variable1 = var1,
+      Variable2 = var2,
+      Spearman_r = r_val,
+      P_value = p_val,
+      stringsAsFactors = FALSE
+    ))
+  }
+}
+
+write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
--- a/eohi1/dataP
+++ b/eohi1/dataP
@ -0,0 +1,45 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Load the data
+exp1_data <- read.csv("exp1.csv")
+
+# Define all NPastDiff and NFutDiff variables
+all_diff_vars <- c(
+  "NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
+  "NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
+  "NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
+  "NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
+  "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
+  "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
+  "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
+  "NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
+)
+
+# Define DGEN variables to average
+dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
+               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
+
+# Calculate domain_mean as average of all 40 variables
+exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
+
+# Calculate DGEN_mean as average of all 8 DGEN variables
+exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
+
+# Save the updated data
+write.csv(exp1_data, "exp1.csv", row.names = FALSE)
+
+# Display summary of the calculated means
+cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
+summary(exp1_data$domain_mean)
+
+cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
+summary(exp1_data$DGEN_mean)
+
+# Show first few rows to verify calculations
+cat("\nFirst 5 rows of calculated means:\n")
+print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
+
+# Show the individual DGEN values for first 5 rows to verify math
+cat("\nFirst 5 rows of individual DGEN values for verification:\n")
+print(exp1_data[1:5, dgen_vars])
--- a/eohi1/datap
+++ b/eohi1/datap
@ -0,0 +1,104 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Load data
+ehi1 <- read.csv("ehi1.csv")
+
+# Create EHI difference variables (NPast - NFut)
+# Preferences
+ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
+ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
+ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
+ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
+ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
+
+# Personality
+ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
+ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
+ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
+ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
+ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
+
+# Values
+ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
+ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
+ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
+ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
+ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
+
+# Life satisfaction
+ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
+ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
+ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
+ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
+ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
+
+# QA: Verify calculations
+cat("\n=== QUALITY ASSURANCE CHECK ===\n")
+cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
+
+qa_pairs <- list(
+  list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
+  list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
+  list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
+  list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
+  list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
+  list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
+  list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
+  list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
+  list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
+  list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
+  list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
+  list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
+  list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
+  list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
+  list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
+  list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
+  list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
+  list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
+  list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
+  list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
+)
+
+all_checks_passed <- TRUE
+
+for (pair in qa_pairs) {
+  # Calculate expected difference
+  expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
+  
+  # Get actual value in target variable
+  actual_value <- ehi1[[pair$target]]
+  
+  # Compare (allowing for floating point precision issues)
+  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
+  
+  if (length(discrepancies) > 0) {
+    cat(sprintf("FAIL: %s\n", pair$target))
+    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
+                length(discrepancies), 
+                paste(head(discrepancies, 10), collapse = ", ")))
+    
+    # Show first discrepancy details
+    row_num <- discrepancies[1]
+    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
+                row_num,
+                pair$npast, ehi1[[pair$npast]][row_num],
+                pair$nfut, ehi1[[pair$nfut]][row_num],
+                expected_diff[row_num],
+                pair$target, actual_value[row_num]))
+    all_checks_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
+  }
+}
+
+cat("\n")
+if (all_checks_passed) {
+  cat("*** ALL QA CHECKS PASSED ***\n")
+} else {
+  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
+}
+
+# Save updated dataset
+write.csv(ehi1, "ehi1.csv", row.names = FALSE)
+cat("\nDataset saved to ehi1.csv\n")
--- a/eohi1/datap
+++ b/eohi1/datap
@ -0,0 +1,167 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Load data
+ehi1 <- read.csv("ehi1.csv")
+
+# Calculate mean scores for EHI variables
+
+# 1. Preferences mean
+ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
+                                         "ehi_pref_tv", "ehi_pref_nap", 
+                                         "ehi_pref_travel")], na.rm = TRUE)
+
+# 2. Personality mean
+ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical", 
+                                         "ehi_pers_dependable", "ehi_pers_anxious", 
+                                         "ehi_pers_complex")], na.rm = TRUE)
+
+# 3. Values mean
+ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad", 
+                                        "ehi_val_opinion", "ehi_val_performance", 
+                                        "ehi_val_justice")], na.rm = TRUE)
+
+# 4. Life satisfaction mean
+ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent", 
+                                         "ehi_life_satisfied", "ehi_life_important", 
+                                         "ehi_life_change")], na.rm = TRUE)
+
+# 5. Global mean (all 20 variables)
+ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music", 
+                                           "ehi_pref_tv", "ehi_pref_nap", 
+                                           "ehi_pref_travel",
+                                           "ehi_pers_extravert", "ehi_pers_critical", 
+                                           "ehi_pers_dependable", "ehi_pers_anxious", 
+                                           "ehi_pers_complex",
+                                           "ehi_val_obey", "ehi_val_trad", 
+                                           "ehi_val_opinion", "ehi_val_performance", 
+                                           "ehi_val_justice",
+                                           "ehi_life_ideal", "ehi_life_excellent", 
+                                           "ehi_life_satisfied", "ehi_life_important", 
+                                           "ehi_life_change")], na.rm = TRUE)
+
+# QA: Verify mean calculations
+cat("\n=== QUALITY ASSURANCE CHECK ===\n")
+cat("Verifying EHI mean calculations\n\n")
+
+cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
+            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
+            ehi1$ehi_pref_travel[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- ehi1$ehi_pref_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
+            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
+            ehi1$ehi_pers_complex[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- ehi1$ehi_pers_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
+            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
+            ehi1$ehi_val_justice[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- ehi1$ehi_val_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
+            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
+            ehi1$ehi_life_change[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- ehi1$ehi_life_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
+for (i in 1:5) {
+  vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i], 
+            ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i], 
+            ehi1$ehi_pref_travel[i],
+            ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i], 
+            ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i], 
+            ehi1$ehi_pers_complex[i],
+            ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i], 
+            ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i], 
+            ehi1$ehi_val_justice[i],
+            ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i], 
+            ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i], 
+            ehi1$ehi_life_change[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- ehi1$ehi_global_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
+              i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+# Overall QA check for all rows
+cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
+
+qa_checks <- list(
+  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
+       target = "ehi_pref_mean", name = "Preferences"),
+  list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
+       target = "ehi_pers_mean", name = "Personality"),
+  list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
+       target = "ehi_val_mean", name = "Values"),
+  list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
+       target = "ehi_life_mean", name = "Life Satisfaction"),
+  list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
+                "ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
+                "ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
+                "ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
+       target = "ehi_global_mean", name = "Global")
+)
+
+all_checks_passed <- TRUE
+
+for (check in qa_checks) {
+  calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
+  actual_mean <- ehi1[[check$target]]
+  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
+  
+  if (length(discrepancies) > 0) {
+    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
+    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
+                length(discrepancies), 
+                paste(head(discrepancies, 10), collapse = ", ")))
+    all_checks_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
+                check$name, length(check$vars), nrow(ehi1)))
+  }
+}
+
+cat("\n")
+if (all_checks_passed) {
+  cat("*** ALL QA CHECKS PASSED ***\n")
+} else {
+  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
+}
+
+# Save updated dataset
+write.csv(ehi1, "ehi1.csv", row.names = FALSE)
+cat("\nDataset saved to ehi1.csv\n")
--- a/eohi1/datap
+++ b/eohi1/datap
@ -0,0 +1,38 @@
+options(scipen = 999)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+data <- read.csv("ehi1.csv")
+
+# Check the levels of the demo_edu variable
+print(levels(factor(data$demo_edu)))
+
+# Also show the unique values and their frequencies
+print("\nUnique values and frequencies:")
+print(table(data$demo_edu, useNA = "ifany"))
+
+# Recode demo_edu into 3 ordinal levels
+data$edu3 <- NA
+
+# HS_TS: High School and Trade School
+data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
+
+# C_Ug: College and University - Undergraduate  
+data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
+
+# grad_prof: University - Graduate, University - PhD, and Professional Degree
+data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
+
+# Convert to ordered factor
+data$edu3 <- factor(data$edu3, 
+                    levels = c("HS_TS", "C_Ug", "grad_prof"),
+                    ordered = TRUE)
+
+# Check the recoded variable
+print(table(data$edu3, useNA = "ifany"))
+
+# Verify the recoding
+print(table(data$demo_edu, data$edu3, useNA = "ifany"))
+
+# Save the updated dataset with the new edu3 variable
+write.csv(data, "ehi1.csv", row.names = FALSE)
--- a/eohi1/descriptive_statistics.csv
+++ b/eohi1/descriptive_statistics.csv
@ -0,0 +1,14 @@
+"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
+"eohiDGEN_pref",1,1063,0.31138,2.29712,0,0.27497,1.4826,-10,10,20,0.16174,2.49869,0.07046
+"eohiDGEN_pers",2,1063,0.44309,2.40063,0,0.39365,1.4826,-10,10,20,0.04618,2.20622,0.07363
+"eohiDGEN_val",3,1063,0.49012,2.38159,0,0.3772,1.4826,-10,10,20,0.37348,2.6545,0.07305
+"eohiDGEN_life",4,1063,0.41016,2.72624,0,0.3866,1.4826,-10,10,20,0.06705,1.55144,0.08362
+"eohiDGEN_mean",5,1063,0.41488,1.7406,0,0.34323,0.99334,-9,9.33,18.33,0.43898,2.96172,0.05339
+"ehi_pref_mean",6,1063,0.13885,0.64266,0,0.09683,0.29652,-2.2,3.2,5.4,0.93756,3.43951,0.01971
+"ehi_pers_mean",7,1063,0.09614,0.71979,0,0.07027,0.59304,-3.6,3.8,7.4,0.53524,3.53968,0.02208
+"ehi_val_mean",8,1063,0.19699,0.72241,0,0.14266,0.29652,-3.4,4.4,7.8,1.17086,5.08196,0.02216
+"ehi_life_mean",9,1063,0.1127,1.12639,0,0.06792,0.59304,-4.8,6,10.8,0.45879,2.97822,0.03455
+"ehi_global_mean",10,1063,0.13617,0.50405,0.05,0.09871,0.37065,-1.65,2.65,4.3,1.09805,3.7645,0.01546
+"AOT_total",11,1063,0.6945,0.60845,0.75,0.69859,0.7413,-1.125,2,3.125,-0.09166,-0.61789,0.01866
+"CRT_correct",12,1063,0.3132,0.36479,0.33,0.26666,0.48926,0,1,1,0.79518,-0.77639,0.01119
+"CRT_int",13,1063,0.614,0.36086,0.67,0.6424,0.48926,0,1,1,-0.45012,-1.11195,0.01107
--- a/eohi1/descriptives
+++ b/eohi1/descriptives
@ -0,0 +1,107 @@
+library(tidyverse)
+library(ggplot2)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Read data
+data <- read.csv("exp1.csv")
+
+# Select variables ending exactly with _T or _F
+df <- data %>% select(matches("(_T|_F)$"))
+
+# Remove demo_f variable (if present)
+df <- df %>% select(-any_of("demo_f"))
+
+str(df)
+
+# Coerce to numeric where possible (without breaking non-numeric)
+df_num <- df %>%
+  mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
+
+# Compute count and proportion correct per variable
+descriptives <- purrr::imap_dfr(df_num, function(col, name) {
+  x <- suppressWarnings(as.numeric(col))
+  x <- x[!is.na(x)]
+  n_total <- length(x)
+  n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
+  prop <- if (n_total == 0) NA_real_ else n_correct / n_total
+  
+  # Extract difficulty number from variable name and map to expected range
+  difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
+  expected_ranges <- list(
+    "15" = c(0.15, 0.25),
+    "35" = c(0.35, 0.45), 
+    "55" = c(0.55, 0.65),
+    "75" = c(0.75, 0.85)
+  )
+  
+  if (as.character(difficulty_num) %in% names(expected_ranges)) {
+    expected_range <- expected_ranges[[as.character(difficulty_num)]]
+    match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
+  } else {
+    match_difficulty <- "UNKNOWN"
+  }
+  
+  tibble(
+    variable = name,
+    n_total = n_total,
+    n_correct = n_correct,
+    prop_correct = round(prop, 5),
+    match_difficulty = match_difficulty
+  )
+}) %>%
+  arrange(variable)
+
+# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
+bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
+bin_factor <- cut(
+  descriptives$prop_correct,
+  breaks = seq(0.10, 1.00, by = 0.10),
+  right = FALSE,
+  include.lowest = FALSE,
+  labels = bin_levels
+)
+bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
+  group_by(bin) %>%
+  summarise(num_variables = n(), .groups = "drop")
+
+# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
+bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
+bin15_factor <- cut(
+  descriptives$prop_correct,
+  breaks = seq(0.15, 0.95, by = 0.10),
+  right = FALSE,
+  include.lowest = FALSE,
+  labels = bin15_levels
+)
+bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
+  group_by(bin) %>%
+  summarise(num_variables = n(), .groups = "drop")
+
+# View
+print(descriptives, n = Inf)
+cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
+print(bin_counts, n = Inf)
+cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
+print(bin15_counts, n = Inf)
+
+# Histogram of proportion correct with custom bins
+histogram <- ggplot(descriptives, aes(x = prop_correct)) +
+  geom_histogram(
+    breaks = seq(0.15, 0.95, by = 0.10),
+    fill = "lightblue", 
+    color = "black", 
+    alpha = 0.7
+  ) +
+  labs(
+    title = "Distribution of Proportion Correct",
+    x = "Proportion Correct",
+    y = "Number of Variables"
+  ) +
+  theme_minimal() +
+  scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
+
+print(histogram)
+
+# Optionally save
+# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
--- a/eohi1/e1
+++ b/eohi1/e1
@ -0,0 +1,88 @@
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+options(scipen = 999)
+
+df <- read.csv("ehi1.csv")
+library(psych)
+library(knitr)
+
+# fixed-decimal formatter (five decimals)
+fmt5 <- function(x) formatC(x, format = "f", digits = 5)
+
+# Select the 4 variables for reliability analysis
+reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]), 
+                      c("eohiDGEN_mean", "ehi_global_mean")]
+
+# Cronbach's Alpha
+alpha_result <- alpha(reliability_data)
+
+# Split-half reliability
+split_half <- splitHalf(reliability_data)
+
+# Inter-item correlations
+cor_matrix <- cor(reliability_data, use = "complete.obs")
+
+# Two-item reliability summary (if applicable)
+two_item_section <- ""
+if (ncol(reliability_data) == 2) {
+  n_complete <- sum(complete.cases(reliability_data))
+  r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
+  r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
+  # Fisher z CI for r
+  fisher_z <- atanh(r_pearson)
+  se_z <- 1 / sqrt(n_complete - 3)
+  z_crit <- qnorm(0.975)
+  ci_z_lower <- fisher_z - z_crit * se_z
+  ci_z_upper <- fisher_z + z_crit * se_z
+  ci_r_lower <- tanh(ci_z_lower)
+  ci_r_upper <- tanh(ci_z_upper)
+  # Approximate Fisher z CI for Spearman rho (large-sample approximation)
+  fisher_z_s <- atanh(r_spearman)
+  ci_zs_lower <- fisher_z_s - z_crit * se_z
+  ci_zs_upper <- fisher_z_s + z_crit * se_z
+  ci_s_lower <- tanh(ci_zs_lower)
+  ci_s_upper <- tanh(ci_zs_upper)
+  # Spearman–Brown/Cronbach's alpha for k = 2
+  alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
+  alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
+  alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
+
+  two_item_section <- paste0(
+    "<h2>Two-Item Reliability Summary</h2>",
+    "<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
+    "<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
+    "<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
+    " (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
+  )
+}
+
+# Create a summary table
+summary_table <- data.frame(
+  Variable = names(reliability_data),
+  n = nrow(reliability_data),
+  Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
+  SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
+  Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
+  Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
+  Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
+)
+
+html_output <- paste0(
+  "<html><head><title>EHI Reliability Analysis</title></head><body>",
+  "<h1>EHI Reliability Analysis</h1>",
+  two_item_section,
+  
+  "<h2>Cronbach's Alpha</h2>",
+  kable(alpha_result$total, format = "html"),
+  
+  "<h2>Split-Half Reliability</h2>",
+  "<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
+  
+  "<h2>Item-Level Statistics</h2>",
+  kable(alpha_result$item.stats, format = "html"),
+  
+  
+  "</body></html>"
+)
+
+writeLines(html_output, "EHI reliability.html")
--- a/eohi1/education_DGEN_means.png
+++ b/eohi1/education_DGEN_means.png
--- a/eohi1/education_domain_means.png
+++ b/eohi1/education_domain_means.png
--- a/eohi1/ehi1
+++ b/eohi1/ehi1
--- a/eohi1/ehi1.csv
+++ b/eohi1/ehi1.csv
--- a/eohi1/eohi_calibration_correlations_summary.csv
+++ b/eohi1/eohi_calibration_correlations_summary.csv
@ -0,0 +1,81 @@
+eohi_var,cal_var,n,pearson_r,pearson_ci_lower,pearson_ci_upper,pearson_p,spearman_rho,spearman_ci_lower,spearman_ci_upper,spearman_p
+eohi_mean,cal_selfActual,1063,-0.09447,-0.15019,-0.0362,0.00205,-0.11288,-0.1699,-0.05351,0.00023
+eohi_val,cal_selfActual,1063,-0.09384,-0.15347,-0.03616,0.00219,-0.09963,-0.16267,-0.04046,0.00114
+eohi_pers,cal_selfActual,1063,-0.07652,-0.12898,-0.01613,0.01258,-0.08281,-0.14357,-0.02535,0.00691
+eohiDGEN_pref,cal_15,1063,0.11449,0.04892,0.17239,0.00018,0.08071,0.01776,0.13995,0.00847
+eohi_pers,cal_55,1063,-0.06004,-0.1185,-0.0065,0.05036,-0.07699,-0.13816,-0.01519,0.01204
+eohi_mean,cal_55,1063,-0.06632,-0.11782,-0.01552,0.03061,-0.07588,-0.13634,-0.01868,0.01333
+eohiDGEN_val,cal_55,1063,-0.05006,-0.1073,0.00598,0.10281,-0.06964,-0.12753,-0.01077,0.02318
+eohiDGEN_mean,cal_true,1063,0.05754,-0.0009,0.11652,0.06074,0.0676,0.00725,0.12602,0.02754
+eohiDGEN_life,cal_75,1063,-0.04564,-0.10516,0.01314,0.13702,-0.05638,-0.11573,0.00538,0.06615
+eohi_life,cal_75,1063,-0.03875,-0.09577,0.01879,0.20683,-0.05565,-0.11564,0.00491,0.06974
+eohiDGEN_mean,cal_15,1063,0.07628,0.01598,0.13615,0.01285,0.05395,-0.00598,0.11493,0.0787
+eohi_val,cal_55,1063,-0.05666,-0.10898,-0.00182,0.06479,-0.05346,-0.10885,0.00265,0.0815
+eohiDGEN_life,cal_55,1063,0.05044,-0.00773,0.10614,0.10026,0.04956,-0.00878,0.11087,0.10635
+eohiDGEN_pref,cal_true,1063,0.07292,0.0095,0.134,0.01741,0.04803,-0.0094,0.108,0.11761
+eohi_pref,cal_selfActual,1063,-0.03281,-0.09762,0.032,0.28519,-0.04423,-0.10373,0.01867,0.14958
+eohiDGEN_val,cal_false,1063,-0.02787,-0.09239,0.03411,0.36398,-0.04393,-0.10451,0.01416,0.15236
+eohiDGEN_pers,cal_15,1063,0.04714,-0.01715,0.10944,0.12451,0.04308,-0.01746,0.10317,0.16046
+eohiDGEN_life,cal_false,1063,0.03476,-0.02508,0.09492,0.25753,0.04047,-0.02102,0.10411,0.18735
+eohi_life,cal_true,1063,-0.02269,-0.08192,0.0416,0.45982,-0.03779,-0.0985,0.01964,0.21833
+eohiDGEN_life,cal_15,1063,0.05091,-0.01796,0.11623,0.09709,0.03755,-0.02524,0.09835,0.22121
+eohiDGEN_life,cal_true,1063,-0.00526,-0.06612,0.05833,0.86395,-0.03744,-0.09633,0.02643,0.22263
+eohiDGEN_life,cal_selfActual,1063,0.02779,-0.03579,0.0868,0.36538,0.03688,-0.02625,0.10029,0.22962
+eohi_val,cal_75,1063,-0.03432,-0.09303,0.02511,0.26362,-0.03548,-0.09664,0.02788,0.24775
+eohiDGEN_val,cal_selfActual,1063,0.02114,-0.04078,0.08546,0.49105,0.03547,-0.02636,0.09663,0.24789
+eohiDGEN_pers,cal_true,1063,0.02354,-0.03617,0.08422,0.44334,0.03532,-0.02544,0.09158,0.24994
+eohiDGEN_pers,cal_35,1063,-0.03495,-0.09252,0.02312,0.25493,-0.03489,-0.09183,0.02535,0.25578
+eohiDGEN_pers,cal_75,1063,-0.01611,-0.07516,0.04168,0.59971,-0.03379,-0.08928,0.02687,0.271
+eohiDGEN_pers,cal_false,1063,-0.02503,-0.08151,0.03334,0.4149,-0.03266,-0.08708,0.02624,0.28745
+eohiDGEN_val,cal_global,1063,-0.00447,-0.05803,0.05482,0.8843,-0.03204,-0.08825,0.0252,0.29666
+eohiDGEN_mean,cal_55,1063,-0.03699,-0.08943,0.01668,0.2282,-0.02929,-0.08473,0.03097,0.34006
+eohiDGEN_pers,cal_selfActual,1063,-0.03827,-0.10619,0.02517,0.21254,-0.02835,-0.09145,0.03191,0.35572
+eohi_life,cal_selfActual,1063,0.04737,-0.01652,0.10847,0.12274,0.02825,-0.03176,0.08666,0.35753
+eohi_mean,cal_75,1063,-0.01187,-0.06715,0.04855,0.69916,-0.0275,-0.0859,0.03279,0.37045
+eohiDGEN_val,cal_15,1063,0.00914,-0.04676,0.06618,0.76607,-0.02707,-0.08456,0.02837,0.37789
+eohi_pref,cal_55,1063,-0.02632,-0.08106,0.02643,0.39136,-0.02524,-0.08705,0.03558,0.41101
+eohiDGEN_pref,cal_35,1063,-0.00852,-0.0681,0.0469,0.78147,-0.02501,-0.08136,0.03028,0.41525
+eohi_val,cal_global,1063,-0.03245,-0.08628,0.02097,0.29049,-0.02445,-0.0837,0.03405,0.42589
+eohiDGEN_pref,cal_global,1063,0.05135,-0.00854,0.10781,0.09423,0.02438,-0.03698,0.07747,0.42716
+eohi_mean,cal_global,1063,-0.00987,-0.0669,0.04625,0.74782,-0.02423,-0.08093,0.03538,0.43002
+eohi_mean,cal_false,1063,0.00103,-0.05609,0.05796,0.97329,-0.02413,-0.08528,0.034,0.43184
+eohi_pers,cal_15,1063,0.0424,-0.01973,0.10006,0.16717,0.02377,-0.03499,0.07967,0.4388
+eohiDGEN_pref,cal_75,1063,0.04194,-0.01865,0.10462,0.17182,0.02253,-0.03687,0.08394,0.46315
+eohi_val,cal_35,1063,0.00919,-0.04488,0.06395,0.76478,-0.02132,-0.07724,0.0359,0.48748
+eohiDGEN_mean,cal_75,1063,0.02182,-0.03779,0.08058,0.47734,0.02086,-0.0387,0.08009,0.49684
+eohi_val,cal_15,1063,-0.0114,-0.06675,0.04342,0.71036,0.02045,-0.0384,0.08265,0.50533
+eohiDGEN_mean,cal_global,1063,0.01728,-0.04015,0.07341,0.5736,0.01974,-0.03874,0.07654,0.52029
+eohiDGEN_val,cal_true,1063,0.03211,-0.02651,0.08716,0.29554,0.01936,-0.04555,0.08179,0.52834
+eohi_mean,cal_15,1063,0.02437,-0.03634,0.08546,0.42732,0.01772,-0.03986,0.07648,0.56377
+eohiDGEN_pers,cal_55,1063,-0.02109,-0.07397,0.03478,0.49211,-0.0175,-0.0759,0.04546,0.56869
+eohiDGEN_mean,cal_selfActual,1063,0.00131,-0.05794,0.06413,0.96594,0.0169,-0.04525,0.07532,0.582
+eohi_life,cal_false,1063,0.02929,-0.02351,0.0794,0.34004,0.01689,-0.04339,0.07467,0.58238
+eohiDGEN_mean,cal_false,1063,-0.02032,-0.07402,0.03873,0.5082,-0.01576,-0.07583,0.04145,0.60773
+eohiDGEN_pref,cal_55,1063,-0.01025,-0.064,0.04642,0.73848,-0.01542,-0.0708,0.046,0.61559
+eohi_life,cal_35,1063,0.03599,-0.02203,0.08892,0.24108,0.01538,-0.04474,0.07414,0.61636
+eohi_pers,cal_false,1063,-0.00935,-0.06513,0.04644,0.76086,-0.01485,-0.07336,0.04336,0.62859
+eohi_life,cal_15,1063,0.0156,-0.0452,0.07483,0.61145,0.01432,-0.04315,0.07438,0.64092
+eohi_pers,cal_true,1063,0.0055,-0.05876,0.07464,0.85794,-0.01291,-0.06976,0.04539,0.67419
+eohi_pref,cal_75,1063,0.02725,-0.03153,0.08793,0.37474,0.01264,-0.0447,0.07576,0.68049
+eohi_mean,cal_true,1063,-0.01786,-0.07676,0.04265,0.56079,-0.01207,-0.07091,0.05063,0.69435
+eohi_val,cal_false,1063,-0.01276,-0.07529,0.04177,0.6777,-0.0115,-0.0741,0.04742,0.7081
+eohi_pers,cal_35,1063,0.01428,-0.04037,0.07111,0.6418,0.01092,-0.04486,0.06885,0.7222
+eohiDGEN_pers,cal_global,1063,-0.00723,-0.06312,0.05198,0.81387,-0.01039,-0.06483,0.05266,0.73501
+eohi_mean,cal_35,1063,0.02233,-0.03257,0.08046,0.46697,-0.01029,-0.06915,0.04512,0.73748
+eohi_pref,cal_global,1063,0.0183,-0.03841,0.07643,0.55118,0.00972,-0.0475,0.07007,0.75165
+eohi_pers,cal_global,1063,-0.00467,-0.06118,0.05112,0.87911,-0.00916,-0.06635,0.05143,0.76546
+eohiDGEN_pref,cal_selfActual,1063,0.02085,-0.03875,0.07863,0.49712,0.00839,-0.04939,0.06846,0.78474
+eohi_val,cal_true,1063,-0.0358,-0.0949,0.02654,0.24352,-0.0081,-0.07342,0.05105,0.79199
+eohi_pref,cal_15,1063,0.02312,-0.04369,0.0815,0.45148,0.00755,-0.05412,0.06986,0.80582
+eohi_pref,cal_false,1063,0.02725,-0.03088,0.08754,0.37477,0.00736,-0.05157,0.06691,0.8106
+eohiDGEN_pref,cal_false,1063,0.00873,-0.04608,0.05948,0.77614,0.00453,-0.05188,0.05691,0.88276
+eohi_pref,cal_35,1063,0.02663,-0.03409,0.08569,0.38574,0.00424,-0.05505,0.06505,0.89016
+eohiDGEN_life,cal_global,1063,0.02651,-0.03474,0.08515,0.38781,0.00414,-0.06008,0.06383,0.89275
+eohiDGEN_val,cal_35,1063,0.0047,-0.04819,0.06294,0.87827,-0.00387,-0.06182,0.05569,0.89972
+eohi_pers,cal_75,1063,-0.01501,-0.08009,0.05265,0.62493,-0.00292,-0.06324,0.06169,0.92424
+eohi_life,cal_global,1063,0.01135,-0.04298,0.06703,0.71158,-0.00291,-0.0638,0.0565,0.92459
+eohiDGEN_life,cal_35,1063,0.01256,-0.04397,0.071,0.68263,-0.00226,-0.06355,0.06168,0.94137
+eohi_life,cal_55,1063,0.0154,-0.0459,0.07265,0.61609,0.00223,-0.05962,0.05937,0.94216
+eohiDGEN_mean,cal_35,1063,-0.01765,-0.07148,0.0442,0.5654,-0.0016,-0.05665,0.05409,0.95856
+eohiDGEN_val,cal_75,1063,0.02365,-0.04062,0.08878,0.44107,-0.00147,-0.06785,0.06103,0.96186
+eohi_pref,cal_true,1063,-0.00826,-0.06273,0.05423,0.78791,0.00098,-0.05588,0.06502,0.97445
--- a/eohi1/eohi_process.xlsm
+++ b/eohi1/eohi_process.xlsm
--- a/eohi1/exp1.csv
+++ b/eohi1/exp1.csv
--- a/eohi1/exp1_TF_descriptives.csv
+++ b/eohi1/exp1_TF_descriptives.csv
@ -0,0 +1,33 @@
+variable,accuracy,TF,n_total,n_correct,prop_correct,match_difficulty,Table A1,Table A2,match_difficulty,,,,,,,
+gest_75_T,75,T,1063,952,0.89558,NO,0.75,0.9,M,,,,,,,
+dors_55_T,55,T,1063,949,0.89276,NO,0.64,0.96,M,,,,,,,
+chur_75_T,75,T,1063,947,0.89087,NO,0.83,0.9,M,,,,,,,
+mons_55_T,55,T,1063,944,0.88805,NO,0.55,0.82,H,,,,,,,
+lock_35_T,35,T,1063,923,0.8683,NO,0.45,0.9,M,,,,,,,
+hume_35_F,35,F,1063,873,0.82126,NO,0.43,0.94,M,,,,,,,
+papy_75_T,75,T,1063,873,0.82126,YES,0.78,0.92,M,,,,,,,
+sham_55_T,55,T,1063,860,0.80903,NO,0.57,1,M,,,,,,,0.665565
+list_75_F,75,F,1063,855,0.80433,YES,0.85,0.96,L,,,,,,,
+cons_55_T,55,T,1063,816,0.76764,NO,0.57,0.9,M,,,,,,,
+tsun_75_T,75,T,1063,813,0.76482,YES,0.8,0.96,L,,,,,,,
+pana_35_T,35,T,1063,805,0.75729,NO,0.37,0.65,H,,,,,,,
+kabu_15_T,15,T,1063,791,0.74412,NO,0.25,0.76,M,,,,,,,
+gulf_15_F,15,F,1063,773,0.72719,NO,0.16,0.24,H,,,,,,,
+oedi_35_T,35,T,1063,767,0.72154,NO,0.43,0.88,M,,,,,,,
+vaud_15_T,15,T,1063,766,0.7206,NO,0.24,0.41,H,,,,,,,
+mont_35_F,35,F,1063,748,0.70367,NO,0.36,0.78,M,,,,,,,
+demo_15_F,15,F,1063,727,0.68391,NO,0.19,0.39,H,,,,,,,
+spee_75_F,75,F,1063,688,0.64722,NO,0.78,0.98,L,,,,,,,
+dwar_55_F,55,F,1063,663,0.62371,YES,0.64,0.76,L,,,,,,,
+carb_35_T,35,T,1063,629,0.59172,NO,0.39,0.31,H,,,,,,,
+bohr_15_T,15,T,1063,626,0.5889,NO,0.16,0.37,H,,,,,,,
+gang_15_F,15,F,1063,572,0.5381,NO,0.21,0.39,H,,,,,,,
+vitc_55_F,55,F,1063,556,0.52305,NO,0.63,0.84,L,,,,,,,
+hert_35_F,35,F,1063,551,0.51834,NO,0.41,0.65,M,,,,,,,
+pucc_15_F,15,F,1063,543,0.51082,NO,0.15,0.22,H,,,,,,,
+troy_15_T,15,T,1063,504,0.47413,NO,0.22,0.49,M,,,,,,,
+moza_55_F,55,F,1063,500,0.47037,NO,0.59,0.71,L,,,,,,,
+croc_75_F,75,F,1063,433,0.40734,NO,0.76,0.88,L,,,,,,,
+gees_55_F,55,F,1063,324,0.3048,NO,0.57,0.84,L,,,,,,,
+lute_35_F,35,F,1063,322,0.30292,NO,0.46,0.67,L,,,,,,,
+memo_75_F,75,F,1063,265,0.24929,NO,0.83,0.88,L,,,,,,,
--- a/eohi1/interaction_DGEN_assumptions.png
+++ b/eohi1/interaction_DGEN_assumptions.png
--- a/eohi1/interaction_domain_assumptions.png
+++ b/eohi1/interaction_domain_assumptions.png
--- a/eohi1/linearity_plots.pdf
+++ b/eohi1/linearity_plots.pdf
--- a/eohi1/mixed
+++ b/eohi1/mixed
@ -0,0 +1,875 @@
+# Mixed ANOVA Analysis for DGEN Variables
+# EOHI Experiment Data Analysis - DGEN Level Analysis
+# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
+#           futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
+
+# Load required libraries
+library(tidyverse)
+library(ez)
+library(car)
+library(afex)         # For aov_ez (cleaner ANOVA output)
+library(nortest)      # For normality tests
+library(emmeans)      # For post-hoc comparisons
+library(purrr)        # For map functions
+library(effsize)      # For Cohen's d calculations
+library(effectsize)   # For effect size calculations
+
+# Global options to remove scientific notation
+options(scipen = 999)
+
+# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
+options(contrasts = c("contr.sum", "contr.poly"))
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+# Verify the specific variables we need
+required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
+                   "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
+
+missing_vars <- required_vars[!required_vars %in% colnames(data)]
+if (length(missing_vars) > 0) {
+  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
+}
+
+# Define domain mapping
+domain_mapping <- data.frame(
+  variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
+               "futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
+  time = c(rep("Past", 4), rep("Future", 4)),
+  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
+  stringsAsFactors = FALSE
+)
+
+# Efficient data pivoting using pivot_longer
+long_data <- data %>%
+  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
+  pivot_longer(
+    cols = all_of(required_vars),
+    names_to = "variable",
+    values_to = "DGEN_SCORE"
+  ) %>%
+  left_join(domain_mapping, by = "variable") %>%
+  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
+  mutate(
+    TIME = factor(time, levels = c("Past", "Future")),
+    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
+    pID = as.factor(pID),
+    TEMPORAL_DO = as.factor(TEMPORAL_DO)
+  ) %>%
+  # Select final columns and remove any rows with missing values
+  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
+  filter(!is.na(DGEN_SCORE))
+
+# =============================================================================
+# DESCRIPTIVE STATISTICS
+# =============================================================================
+
+# Overall descriptive statistics by TIME and DOMAIN
+desc_stats <- long_data %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
+    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
+    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
+    median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
+    q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
+    q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
+    min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
+    max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TIME and DOMAIN:")
+print(desc_stats)
+
+# Descriptive statistics by between-subjects factors
+desc_stats_by_temporal <- long_data %>%
+  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
+    variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
+    sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
+print(desc_stats_by_temporal)
+
+
+# =============================================================================
+# ASSUMPTION TESTING
+# =============================================================================
+
+# Remove missing values for assumption testing
+long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
+
+# 1. Missing values check
+missing_summary <- long_data %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n_total = n(),
+    n_missing = sum(is.na(DGEN_SCORE)),
+    pct_missing = round(100 * n_missing / n_total, 2),
+    .groups = 'drop'
+  )
+
+print("Missing values by TIME and DOMAIN:")
+print(missing_summary)
+
+# 2. Outlier detection
+outlier_summary <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    mean = mean(DGEN_SCORE),
+    sd = sd(DGEN_SCORE),
+    q1 = quantile(DGEN_SCORE, 0.25),
+    q3 = quantile(DGEN_SCORE, 0.75),
+    iqr = q3 - q1,
+    lower_bound = q1 - 1.5 * iqr,
+    upper_bound = q3 + 1.5 * iqr,
+    n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
+    .groups = 'drop'
+  )
+
+print("Outlier summary (IQR method):")
+print(outlier_summary)
+
+# 3. Anderson-Darling normality test (streamlined)
+normality_results <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
+    ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
+    .groups = 'drop'
+  )
+
+print("Anderson-Darling normality test results:")
+# Round only the numeric columns
+normality_results_rounded <- normality_results %>%
+  mutate(across(where(is.numeric), ~ round(.x, 5)))
+print(normality_results_rounded)
+
+# 4. Homogeneity of variance (Levene's test)
+# Test homogeneity across TIME within each DOMAIN
+homogeneity_time <- long_data_clean %>%
+  group_by(DOMAIN) %>%
+  summarise(
+    levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
+    levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across TIME within each DOMAIN:")
+print(homogeneity_time)
+
+# Test homogeneity across DOMAIN within each TIME
+homogeneity_domain <- long_data_clean %>%
+  group_by(TIME) %>%
+  summarise(
+    levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
+    levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across DOMAIN within each TIME:")
+print(homogeneity_domain)
+
+# =============================================================================
+# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
+# =============================================================================
+
+# Function to calculate Hartley's F-max ratio
+calculate_hartley_ratio <- function(variances) {
+  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
+}
+
+# =============================================================================
+# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
+# =============================================================================
+
+# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
+# within each combination of within-subjects factors (TIME × DOMAIN)
+
+print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
+
+observed_temporal_ratios <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
+    past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
+    fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
+    # Calculate F-max ratio
+    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
+    .groups = 'drop'
+  ) %>%
+  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
+
+print(observed_temporal_ratios)
+
+# More efficient bootstrap function for Hartley's F-max test
+bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
+  # Get unique groups and their sample sizes
+  groups <- unique(data[[group_var]])
+  
+  # Calculate observed variances for each group
+  observed_vars <- data %>%
+    dplyr::group_by(!!rlang::sym(group_var)) %>%
+    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
+    dplyr::pull(var)
+  
+  # Handle invalid variances
+  if(any(observed_vars <= 0 | is.na(observed_vars))) {
+    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
+  }
+  
+  # Calculate observed F-max ratio
+  observed_ratio <- max(observed_vars) / min(observed_vars)
+  
+  # Pre-allocate storage for bootstrap ratios
+  bootstrap_ratios <- numeric(n_iter)
+  
+  # Get group data once
+  group_data_list <- map(groups, ~ {
+    group_data <- data[data[[group_var]] == .x, response_var]
+    group_data[!is.na(group_data)]
+  })
+  
+  # Bootstrap with pre-allocated storage
+  for(i in 1:n_iter) {
+    # Bootstrap sample from each group independently
+    sample_vars <- map_dbl(group_data_list, ~ {
+      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
+      var(bootstrap_sample, na.rm = TRUE)
+    })
+    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
+  }
+  
+  # Remove invalid ratios
+  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
+  
+  if(length(valid_ratios) == 0) {
+    stop("No valid bootstrap ratios generated")
+  }
+  
+  # Calculate critical value (95th percentile)
+  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
+  
+  # Return only essential information
+  return(list(
+    observed_ratio = observed_ratio,
+    critical_95 = critical_95,
+    n_valid_iterations = length(valid_ratios)
+  ))
+}
+
+# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
+print("=== HARTLEY'S F-MAX TEST RESULTS ===")
+set.seed(123)  # For reproducibility
+
+hartley_temporal_results <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
+    .groups = 'drop'
+  ) %>%
+  mutate(
+    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
+    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
+    significant = observed_ratio > critical_95
+  ) %>%
+  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
+
+print(hartley_temporal_results)
+
+# =============================================================================
+# MIXED ANOVA ANALYSIS
+# =============================================================================
+
+# Check if design is balanced
+design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
+if(all(design_balance %in% c(0, 1))) {
+  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
+} else {
+  print("Warning: Design is unbalanced")
+  print(summary(as.vector(design_balance)))
+}
+
+# =============================================================================
+# MIXED ANOVA WITH SPHERICITY CORRECTIONS
+# =============================================================================
+
+print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
+
+# Mixed ANOVA using ezANOVA with automatic sphericity corrections
+# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
+# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
+mixed_anova_model <- ezANOVA(data = long_data_clean, 
+                             dv = DGEN_SCORE, 
+                             wid = pID, 
+                             between = TEMPORAL_DO, 
+                             within = .(TIME, DOMAIN),
+                             type = 3,
+                             detailed = TRUE)
+
+anova_output <- mixed_anova_model$ANOVA
+rownames(anova_output) <- NULL  # Reset row numbers to be sequential
+print(anova_output)
+
+# Show Mauchly's test for sphericity
+print("Mauchly's Test of Sphericity:")
+print(mixed_anova_model$Mauchly)
+
+# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
+if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
+  print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
+  print(mixed_anova_model$`Sphericity Corrections`)
+  
+  # Extract and display corrected degrees of freedom
+  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
+  
+  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
+  anova_table <- mixed_anova_model$ANOVA
+  
+  corrected_df <- data.frame(
+    Effect = sphericity_corr$Effect,
+    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
+    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
+    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    GG_epsilon = sphericity_corr$GGe,
+    HF_epsilon = sphericity_corr$HFe
+  )
+  
+  print(corrected_df)
+  
+  cat("\n=== CORRECTED F-TESTS ===\n")
+  for(i in seq_len(nrow(corrected_df))) {
+    effect <- corrected_df$Effect[i]
+    f_value <- anova_table$F[match(effect, anova_table$Effect)]
+    
+    cat(sprintf("\n%s:\n", effect))
+    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
+                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
+    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
+    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
+  }
+} else {
+  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
+}
+
+# =============================================================================
+# EFFECT SIZES (GENERALIZED ETA SQUARED)
+# =============================================================================
+
+print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
+
+# Extract generalized eta squared from ezANOVA (already calculated)
+effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
+effect_sizes$ges <- round(effect_sizes$ges, 5)
+print("Generalized Eta Squared:")
+print(effect_sizes)
+
+# =============================================================================
+# POST-HOC COMPARISONS
+# =============================================================================
+
+# Post-hoc comparisons using emmeans
+print("\n=== POST-HOC COMPARISONS ===")
+
+# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
+aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
+                 data = long_data_clean)
+
+# Main effect of TIME
+print("Main Effect of TIME:")
+time_emmeans <- emmeans(aov_model, ~ TIME)
+print("Estimated Marginal Means:")
+print(time_emmeans)
+print("\nPairwise Contrasts:")
+time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
+print(time_contrasts)
+
+# Main effect of DOMAIN
+print("\nMain Effect of DOMAIN:")
+domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
+print("Estimated Marginal Means:")
+print(domain_emmeans)
+print("\nPairwise Contrasts:")
+domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
+print(domain_contrasts)
+
+# Main effect of TEMPORAL_DO
+print("\nMain Effect of TEMPORAL_DO:")
+temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
+temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
+print(temporal_contrasts)
+
+# =============================================================================
+# INTERACTION EXPLORATIONS
+# =============================================================================
+
+# TEMPORAL_DO × TIME Interaction
+print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
+temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
+print("Estimated Marginal Means:")
+print(temporal_time_emmeans)
+
+print("\nSimple Effects of TIME within each TEMPORAL_DO:")
+temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
+print(temporal_time_simple)
+
+print("\nSimple Effects of TEMPORAL_DO within each TIME:")
+temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
+print(temporal_time_simple2)
+
+# TIME × DOMAIN Interaction
+print("\n=== TIME × DOMAIN INTERACTION ===")
+time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
+print("Estimated Marginal Means:")
+print(time_domain_emmeans)
+
+print("\nSimple Effects of DOMAIN within each TIME:")
+time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
+print(time_domain_simple)
+
+print("\nSimple Effects of TIME within each DOMAIN:")
+time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
+print(time_domain_simple2)
+
+# TEMPORAL_DO × DOMAIN Interaction
+print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
+temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
+print("Estimated Marginal Means:")
+print(temporal_domain_emmeans)
+
+print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
+temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
+print(temporal_domain_simple)
+
+print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
+temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
+print(temporal_domain_simple2)
+
+# =============================================================================
+# THREE-WAY INTERACTION ANALYSIS
+# =============================================================================
+
+print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
+three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
+print("Estimated Marginal Means:")
+print(three_way_emmeans)
+
+print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
+three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
+print(three_way_contrasts)
+
+# =============================================================================
+# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
+# =============================================================================
+
+# Cohen's d calculations (library already loaded)
+
+print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
+
+# Function to calculate Cohen's d for pairwise comparisons
+calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
+  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
+  
+  if(nrow(significant_pairs) > 0) {
+    cat("Significant pairwise comparisons (p < 0.05):\n")
+    print(significant_pairs)
+    
+    cat("\nCohen's d calculated from raw data:\n")
+    
+    for(i in seq_len(nrow(significant_pairs))) {
+      comparison <- significant_pairs[i, ]
+      contrast_name <- as.character(comparison$contrast)
+      
+      # Parse the contrast
+      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
+      if(length(contrast_parts) == 2) {
+        level1 <- trimws(contrast_parts[1])
+        level2 <- trimws(contrast_parts[2])
+        
+        # Get raw data for both conditions
+        if(group2_var %in% colnames(comparison)) {
+          group2_level <- as.character(comparison[[group2_var]])
+          data1 <- data[[response_var]][
+            data[[group1_var]] == level1 & 
+            data[[group2_var]] == group2_level]
+          
+          data2 <- data[[response_var]][
+            data[[group1_var]] == level2 & 
+            data[[group2_var]] == group2_level]
+        } else {
+          data1 <- data[[response_var]][data[[group1_var]] == level1]
+          data2 <- data[[response_var]][data[[group1_var]] == level2]
+        }
+        
+        if(length(data1) > 0 && length(data2) > 0) {
+          # Calculate Cohen's d using effsize package
+          cohens_d_result <- cohen.d(data1, data2)
+          
+          cat(sprintf("Comparison: %s", contrast_name))
+          if(group2_var %in% colnames(comparison)) {
+            cat(sprintf(" | %s", group2_level))
+          }
+          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
+          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+          cat("\n")
+        }
+      }
+    }
+  } else {
+    cat("No significant pairwise comparisons found.\n")
+  }
+}
+
+# =============================================================================
+# 1. TEMPORAL_DO × TIME INTERACTION
+# =============================================================================
+
+print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
+
+# Get simple effects of TIME within each TEMPORAL_DO
+temporal_time_simple_df <- as.data.frame(temporal_time_simple)
+calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
+
+# Get simple effects of TEMPORAL_DO within each TIME  
+temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
+calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
+
+# =============================================================================
+# 2. TIME × DOMAIN INTERACTION
+# =============================================================================
+
+print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
+
+# Get simple effects of TIME within each DOMAIN
+time_domain_simple2_df <- as.data.frame(time_domain_simple2)
+calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
+
+# Get simple effects of DOMAIN within each TIME
+time_domain_simple_df <- as.data.frame(time_domain_simple)
+calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
+
+# =============================================================================
+# 3. TEMPORAL_DO × DOMAIN INTERACTION
+# =============================================================================
+
+print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
+
+# Get simple effects of TEMPORAL_DO within each DOMAIN
+temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
+calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
+
+# Get simple effects of DOMAIN within each TEMPORAL_DO
+temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
+calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
+
+# =============================================================================
+# 4. THREE-WAY INTERACTION COHEN'S D
+# =============================================================================
+
+print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
+
+# Get pairwise comparisons for the three-way interaction
+three_way_contrasts_df <- as.data.frame(three_way_contrasts)
+print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
+print(three_way_contrasts_df)
+
+# Calculate Cohen's d for significant three-way interaction effects
+print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
+
+# Extract significant comparisons (p < 0.05)
+significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
+
+if(nrow(significant_three_way) > 0) {
+  for(i in seq_len(nrow(significant_three_way))) {
+    comparison <- significant_three_way[i, ]
+    
+    # Extract the grouping variables
+    temporal_do_level <- as.character(comparison$TEMPORAL_DO)
+    domain_level <- as.character(comparison$DOMAIN)
+    
+    # Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
+    past_data <- long_data_clean$DGEN_SCORE[
+      long_data_clean$TEMPORAL_DO == temporal_do_level & 
+      long_data_clean$DOMAIN == domain_level & 
+      long_data_clean$TIME == "Past"
+    ]
+    
+    future_data <- long_data_clean$DGEN_SCORE[
+      long_data_clean$TEMPORAL_DO == temporal_do_level & 
+      long_data_clean$DOMAIN == domain_level & 
+      long_data_clean$TIME == "Future"
+    ]
+    
+    if(length(past_data) > 0 && length(future_data) > 0) {
+      # Calculate Cohen's d using effsize package
+      cohens_d_result <- cohen.d(past_data, future_data)
+      
+      cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
+      cat(sprintf("  Past vs Future comparison\n"))
+      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
+      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
+      cat("\n")
+    }
+  }
+} else {
+  cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
+}
+
+# =============================================================================
+# INTERACTION PLOTS
+# =============================================================================
+
+print("=== INTERACTION PLOTS ===")
+
+# Define color palette for DOMAIN (4 levels)
+domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
+                   "Values" = "#FFB000", "Life" = "#FE6100")
+
+# TEMPORAL_DO × DOMAIN INTERACTION PLOT
+# Create estimated marginal means for TEMPORAL_DO × DOMAIN
+emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
+
+# Prepare emmeans data frame
+emmeans_temporal_domain <- emm_temporal_domain %>%
+  as.data.frame() %>%
+  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
+  rename(
+    ci_lower = lower.CL,
+    ci_upper = upper.CL,
+    plot_mean = emmean
+  ) %>%
+  mutate(
+    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
+    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
+  )
+
+# Prepare raw data for plotting
+iPlot_temporal_domain <- long_data_clean %>%
+  dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
+  mutate(
+    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
+    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
+  )
+
+# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
+# Convert to numeric x-axis and add position offsets for dodging
+dodge_width <- 0.6
+iPlot_temporal_domain <- iPlot_temporal_domain %>%
+  mutate(
+    x_pos = as.numeric(TEMPORAL_DO),
+    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
+    x_dodged = x_pos + domain_offset
+  )
+
+emmeans_temporal_domain <- emmeans_temporal_domain %>%
+  mutate(
+    x_pos = as.numeric(TEMPORAL_DO),
+    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
+    x_dodged = x_pos + domain_offset
+  )
+
+interaction_plot_temporal_domain <- ggplot() +
+  # Distribution layer - violins (completely separated)
+  geom_violin(
+    data = iPlot_temporal_domain,
+    aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
+    alpha = 0.4,
+    color = NA,
+    trim = FALSE,
+    scale = "width",
+    width = dodge_width / 4
+  ) +
+  # Emmeans error bars
+  geom_errorbar(
+    data = emmeans_temporal_domain,
+    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
+    width = 0.08,
+    linewidth = 0.8,
+    color = "black"
+  ) +
+  # Emmeans points
+  geom_point(
+    data = emmeans_temporal_domain,
+    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
+    size = 4,
+    stroke = 1,
+    color = "black"
+  ) +
+  labs(
+    x = "Order", 
+    y = "Mean absolute difference from present",
+    title = "TEMPORAL_DO × DOMAIN Interaction"
+  ) +
+  scale_x_continuous(
+    breaks = c(1, 2),
+    labels = c("Past First", "Future First"),
+    limits = c(0.4, 2.6)
+  ) +
+  scale_y_continuous(
+    limits = c(0, 10),
+    breaks = seq(0, 10, 2)
+  ) +
+  scale_color_manual(name = "Domain", values = domain_colors) +
+  scale_fill_manual(name = "Domain", values = domain_colors) +
+  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
+  theme_minimal(base_size = 13) +
+  theme(
+    axis.text = element_text(size = 11),
+    axis.title = element_text(size = 12),
+    plot.title = element_text(size = 14, hjust = 0.5),
+    legend.position = "right",
+    legend.title = element_text(size = 11),
+    legend.text = element_text(size = 10),
+    panel.grid.major.x = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
+  )
+
+print(interaction_plot_temporal_domain)
+
+# =============================================================================
+# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
+# =============================================================================
+
+# Create fresh emmeans data for emmeans-only plot
+emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
+
+emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
+  as.data.frame() %>%
+  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
+  rename(
+    ci_lower = lower.CL,
+    ci_upper = upper.CL,
+    plot_mean = emmean
+  ) %>%
+  mutate(
+    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
+    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
+    x_pos = as.numeric(TEMPORAL_DO),
+    domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
+    x_dodged = x_pos + domain_offset
+  )
+
+# Create emmeans-only plot
+interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
+  geom_errorbar(
+    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
+    width = 0.1,
+    linewidth = 1,
+    alpha = 0.8
+  ) +
+  geom_point(
+    aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
+    size = 5,
+    stroke = 1.2,
+    color = "black"
+  ) +
+  labs(
+    x = "Order", 
+    y = "Absolute difference from the present",
+    title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
+  ) +
+  scale_x_continuous(
+    breaks = c(1, 2),
+    labels = c("Past First", "Future First"),
+    limits = c(0.4, 2.6)
+  ) +
+  scale_y_continuous(
+    limits = c(3, 6),
+    breaks = seq(0, 10, 1)
+  ) +
+  scale_color_manual(name = "Domain", values = domain_colors) +
+  scale_fill_manual(name = "Domain", values = domain_colors) +
+  scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
+  theme_minimal(base_size = 13) +
+  theme(
+    axis.text = element_text(size = 11),
+    axis.title = element_text(size = 12),
+    plot.title = element_text(size = 14, hjust = 0.5),
+    legend.position = "right",
+    legend.title = element_text(size = 11),
+    legend.text = element_text(size = 10),
+    panel.grid.major.x = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
+  )
+
+print(interaction_plot_emmeans_only)
+
+# =============================================================================
+# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
+# =============================================================================
+
+# Prepare emmeans data frame for TIME main effect
+time_main_emm_df <- time_emmeans %>%
+  as.data.frame() %>%
+  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
+  rename(
+    ci_lower = lower.CL,
+    ci_upper = upper.CL,
+    plot_mean = emmean
+  ) %>%
+  mutate(
+    TIME = factor(TIME, levels = c("Past", "Future"))
+  )
+
+# Define color palette for TIME
+time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
+
+# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
+time_main_plot <- ggplot(time_main_emm_df) +
+  geom_errorbar(
+    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
+    width = 0.15,
+    linewidth = 1,
+    alpha = 0.8
+  ) +
+  geom_point(
+    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
+    size = 5,
+    stroke = 1.2,
+    color = "black"
+  ) +
+  labs(
+    x = "Time",
+    y = "Absolute difference from the present",
+    title = "Main Effect of TIME (Estimated Marginal Means)"
+  ) +
+  scale_color_manual(name = "Temporal Direction", values = time_colors) +
+  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
+  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
+  theme_minimal(base_size = 13) +
+  theme(
+    axis.text = element_text(size = 11),
+    axis.title = element_text(size = 12),
+    plot.title = element_text(size = 14, hjust = 0.5),
+    legend.position = "right",
+    legend.title = element_text(size = 11),
+    legend.text = element_text(size = 10),
+    panel.grid.major.x = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
+  )
+
+print(time_main_plot) 
--- a/eohi1/mixed
+++ b/eohi1/mixed
@ -0,0 +1,769 @@
+# Mixed ANOVA Analysis for Domain Means
+# EOHI Experiment Data Analysis - Domain Level Analysis
+# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
+#           NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
+
+library(tidyverse)
+library(ez)
+library(car)
+library(nortest)      # For normality tests
+library(emmeans)      # For post-hoc comparisons
+library(purrr)        # For map functions
+library(effsize)      # For Cohen's d calculations
+library(effectsize)   # For effect size calculations
+library(ggplot2)      # For plotting
+
+options(scipen = 999)
+
+options(contrasts = c("contr.sum", "contr.poly"))
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
+                   "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
+
+# Define domain mapping
+domain_mapping <- data.frame(
+  variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
+               "NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
+  time = c(rep("Past", 4), rep("Future", 4)),
+  domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
+  stringsAsFactors = FALSE
+)
+
+long_data <- data %>%
+  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
+  pivot_longer(
+    cols = all_of(required_vars),
+    names_to = "variable",
+    values_to = "MEAN_DIFFERENCE"
+  ) %>%
+  left_join(domain_mapping, by = "variable") %>%
+  # Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
+  mutate(
+    TIME = factor(time, levels = c("Past", "Future")),
+    DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
+    pID = as.factor(pID),
+    TEMPORAL_DO = as.factor(TEMPORAL_DO)
+  ) %>%
+  # Select final columns and remove any rows with missing values
+  select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
+  filter(!is.na(MEAN_DIFFERENCE))
+
+# Overall descriptive statistics by TIME and DOMAIN
+desc_stats <- long_data %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
+    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
+    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print(desc_stats)
+
+# Descriptive statistics by between-subjects factors
+desc_stats_by_temporal <- long_data %>%
+  group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print(desc_stats_by_temporal)
+
+# ASSUMPTION TESTING
+
+# 1. Missing values check
+missing_summary <- long_data %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n_total = n(),
+    n_missing = sum(is.na(MEAN_DIFFERENCE)),
+    pct_missing = round(100 * n_missing / n_total, 2),
+    .groups = 'drop'
+  )
+
+print(missing_summary)
+
+# Create clean dataset (long_data is already filtered for NA values)
+long_data_clean <- long_data
+
+# 2. Outlier detection
+outlier_summary <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    mean = mean(MEAN_DIFFERENCE),
+    sd = sd(MEAN_DIFFERENCE),
+    q1 = quantile(MEAN_DIFFERENCE, 0.25),
+    median = median(MEAN_DIFFERENCE),
+    q3 = quantile(MEAN_DIFFERENCE, 0.75),
+    iqr = q3 - q1,
+    lower_bound = q1 - 1.5 * iqr,
+    upper_bound = q3 + 1.5 * iqr,
+    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
+    .groups = 'drop'
+  )
+
+print(outlier_summary)
+
+# 3. Anderson-Darling normality test (streamlined)
+normality_results <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    n = n(),
+    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
+    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
+    .groups = 'drop'
+  )
+
+print(normality_results)
+
+# 4. Homogeneity of variance (Levene's test)
+# Test homogeneity across TIME within each DOMAIN
+homogeneity_time <- long_data_clean %>%
+  group_by(DOMAIN) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print(homogeneity_time)
+
+# Test homogeneity across DOMAIN within each TIME
+homogeneity_domain <- long_data_clean %>%
+  group_by(TIME) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print(homogeneity_domain)
+
+# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
+
+# Function to calculate Hartley's F-max ratio
+calculate_hartley_ratio <- function(variances) {
+  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
+}
+
+# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
+# within each combination of within-subjects factors (TIME × DOMAIN)
+
+print(unique(long_data_clean$TEMPORAL_DO))
+print(table(long_data_clean$TEMPORAL_DO))
+
+observed_temporal_ratios <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
+    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
+    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
+    # Calculate F-max ratio
+    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
+    .groups = 'drop'
+  ) %>%
+  select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
+
+print(observed_temporal_ratios)
+
+# More efficient bootstrap function for Hartley's F-max test
+bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
+  # Get unique groups and their sample sizes
+  groups <- unique(data[[group_var]])
+  
+  # Calculate observed variances for each group
+  observed_vars <- data %>%
+    dplyr::group_by(!!rlang::sym(group_var)) %>%
+    dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
+    dplyr::pull(var)
+  
+  # Handle invalid variances
+  if(any(observed_vars <= 0 | is.na(observed_vars))) {
+    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
+  }
+  
+  # Calculate observed F-max ratio
+  observed_ratio <- max(observed_vars) / min(observed_vars)
+  
+  # Pre-allocate storage for bootstrap ratios
+  bootstrap_ratios <- numeric(n_iter)
+  
+  # Get group data once
+  group_data_list <- map(groups, ~ {
+    group_data <- data[data[[group_var]] == .x, response_var]
+    group_data[!is.na(group_data)]
+  })
+  
+  # Bootstrap with pre-allocated storage
+  for(i in 1:n_iter) {
+    # Bootstrap sample from each group independently
+    sample_vars <- map_dbl(group_data_list, ~ {
+      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
+      var(bootstrap_sample, na.rm = TRUE)
+    })
+    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
+  }
+  
+  # Remove invalid ratios
+  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
+  
+  if(length(valid_ratios) == 0) {
+    stop("No valid bootstrap ratios generated")
+  }
+  
+  # Calculate critical value (95th percentile)
+  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
+  
+  # Return only essential information
+  return(list(
+    observed_ratio = observed_ratio,
+    critical_95 = critical_95,
+    n_valid_iterations = length(valid_ratios)
+  ))
+}
+
+# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
+set.seed(123)  # For reproducibility
+
+hartley_temporal_results <- long_data_clean %>%
+  group_by(TIME, DOMAIN) %>%
+  summarise(
+    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
+    .groups = 'drop'
+  ) %>%
+  mutate(
+    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
+    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
+    significant = observed_ratio > critical_95
+  ) %>%
+  select(TIME, DOMAIN, observed_ratio, critical_95, significant)
+
+print(hartley_temporal_results)
+
+# MIXED ANOVA ANALYSIS
+
+# Check for complete cases
+complete_cases <- sum(complete.cases(long_data_clean))
+print(complete_cases)
+
+# Check if design is balanced
+design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
+if(all(design_balance %in% c(0, 1))) {
+  print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
+} else {
+  print("Warning: Design is unbalanced")
+  print(summary(as.vector(design_balance)))
+}
+
+# MIXED ANOVA WITH SPHERICITY CORRECTIONS
+
+# Mixed ANOVA using ezANOVA with automatic sphericity corrections
+# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
+# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
+mixed_anova_model <- ezANOVA(data = long_data_clean, 
+                             dv = MEAN_DIFFERENCE, 
+                             wid = pID, 
+                             between = TEMPORAL_DO, 
+                             within = .(TIME, DOMAIN),
+                             type = 3,
+                             detailed = TRUE)
+
+anova_output <- mixed_anova_model$ANOVA
+rownames(anova_output) <- NULL  # Reset row numbers to be sequential
+print(anova_output)
+
+# Show Mauchly's test for sphericity
+print(mixed_anova_model$Mauchly)
+
+# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
+if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
+  print(mixed_anova_model$`Sphericity Corrections`)
+  
+  # Extract and display corrected degrees of freedom
+
+  sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
+  anova_table <- mixed_anova_model$ANOVA
+  
+  corrected_df <- data.frame(
+    Effect = sphericity_corr$Effect,
+    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
+    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
+    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    GG_epsilon = sphericity_corr$GGe,
+    HF_epsilon = sphericity_corr$HFe
+  )
+  
+  print(corrected_df)
+  
+  # Between-subjects effects (no sphericity corrections needed)
+  between_effects <- c("TEMPORAL_DO")
+  for(effect in between_effects) {
+    if(effect %in% anova_table$Effect) {
+      f_value <- anova_table$F[anova_table$Effect == effect]
+      dfn <- anova_table$DFn[anova_table$Effect == effect]
+      dfd <- anova_table$DFd[anova_table$Effect == effect]
+      p_value <- anova_table$p[anova_table$Effect == effect]
+      
+      cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
+    }
+  }
+  
+  # Within-subjects effects (sphericity corrections where applicable)
+
+  # TIME main effect (2 levels, sphericity automatically satisfied)
+  if("TIME" %in% anova_table$Effect) {
+    f_value <- anova_table$F[anova_table$Effect == "TIME"]
+    dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
+    dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
+    p_value <- anova_table$p[anova_table$Effect == "TIME"]
+    cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
+  }
+  
+  # DOMAIN main effect (4 levels, needs sphericity correction)
+  if("DOMAIN" %in% anova_table$Effect) {
+    f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
+    dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
+    dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
+    p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
+    cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
+  }
+  
+  # Interactions with sphericity corrections
+  for(i in seq_len(nrow(corrected_df))) {
+    effect <- corrected_df$Effect[i]
+    f_value <- anova_table$F[match(effect, anova_table$Effect)]
+    
+    cat(sprintf("\n%s:\n", effect))
+    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
+                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
+    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
+    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
+  }
+} else {
+  print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
+}
+
+# COHEN'S D FOR MAIN EFFECTS
+
+# Create aov model for emmeans 
+aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)), 
+                 data = long_data_clean)
+
+# Main Effect of TIME
+time_emmeans <- emmeans(aov_model, ~ TIME)
+print(time_emmeans)
+
+time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
+
+time_main_df <- as.data.frame(time_main_contrast)
+print(time_main_df)
+
+# Calculate Cohen's d for TIME main effect
+if(nrow(time_main_df) > 0) {
+  cat("\nCohen's d for TIME main effect:\n")
+  time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
+  time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
+  
+  time_cohens_d <- cohen.d(time_past_data, time_future_data)
+  cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
+  cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
+  cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
+  cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
+}
+
+# Main Effect of DOMAIN (significant: p < 0.001)
+domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
+print(domain_emmeans)
+domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
+domain_main_df <- as.data.frame(domain_main_contrast)
+print(domain_main_df)
+
+# Calculate Cohen's d for significant DOMAIN contrasts
+significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
+if(nrow(significant_domain) > 0) {
+  cat("\nCohen's d for significant DOMAIN contrasts:\n")
+  for(i in seq_len(nrow(significant_domain))) {
+    contrast_name <- as.character(significant_domain$contrast[i])
+    contrast_parts <- strsplit(contrast_name, " - ")[[1]]
+    if(length(contrast_parts) == 2) {
+      level1 <- trimws(contrast_parts[1])
+      level2 <- trimws(contrast_parts[2])
+      
+      data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
+      data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
+      
+      if(length(data1) > 0 && length(data2) > 0) {
+        domain_cohens_d <- cohen.d(data1, data2)
+        cat(sprintf("Comparison: %s\n", contrast_name))
+        cat(sprintf("  n1 = %d, n2 = %d\n", length(data1), length(data2)))
+        cat(sprintf("  Cohen's d: %.5f\n", domain_cohens_d$estimate))
+        cat(sprintf("  Effect size interpretation: %s\n", domain_cohens_d$magnitude))
+        cat(sprintf("  p-value: %.5f\n", significant_domain$p.value[i]))
+        cat("\n")
+      }
+    }
+  }
+}
+
+# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
+
+# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
+temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
+temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
+print(temporal_time_simple)
+
+# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
+time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
+time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
+print(time_domain_simple)
+
+# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
+
+# Function to calculate Cohen's d for pairwise comparisons
+calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
+  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
+  
+  if(nrow(significant_pairs) > 0) {
+    cat("Significant pairwise comparisons (p < 0.05):\n")
+    print(significant_pairs)
+    
+    cat("\nCohen's d calculated from raw data:\n")
+    
+    for(i in seq_len(nrow(significant_pairs))) {
+      comparison <- significant_pairs[i, ]
+      contrast_name <- as.character(comparison$contrast)
+      
+      # Parse the contrast
+      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
+      if(length(contrast_parts) == 2) {
+        level1 <- trimws(contrast_parts[1])
+        level2 <- trimws(contrast_parts[2])
+        
+        # Get raw data for both conditions
+        if(group2_var %in% colnames(comparison)) {
+          group2_level <- as.character(comparison[[group2_var]])
+          data1 <- data[[response_var]][
+            data[[group1_var]] == level1 & 
+            data[[group2_var]] == group2_level]
+          
+          data2 <- data[[response_var]][
+            data[[group1_var]] == level2 & 
+            data[[group2_var]] == group2_level]
+        } else {
+          data1 <- data[[response_var]][data[[group1_var]] == level1]
+          data2 <- data[[response_var]][data[[group1_var]] == level2]
+        }
+        
+        if(length(data1) > 0 && length(data2) > 0) {
+          # Calculate Cohen's d using effsize package
+          cohens_d_result <- cohen.d(data1, data2)
+          
+          cat(sprintf("Comparison: %s", contrast_name))
+          if(group2_var %in% colnames(comparison)) {
+            cat(sprintf(" | %s", group2_level))
+          }
+          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
+          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+          cat("\n")
+        }
+      }
+    }
+  } else {
+    cat("No significant pairwise comparisons found.\n")
+  }
+}
+
+# 1. TEMPORAL_DO × TIME INTERACTION 
+
+# Get simple effects of TIME within each TEMPORAL_DO
+temporal_time_simple_df <- as.data.frame(temporal_time_simple)
+calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
+
+# 2. TIME × DOMAIN INTERACTION 
+
+# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
+time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
+time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
+print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
+print("Past vs Future contrasts for each domain:")
+print(time_domain_simple_df)
+
+# Calculate Cohen's d for Past vs Future contrasts within each domain
+print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
+significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
+
+if(nrow(significant_time_domain) > 0) {
+  print("Significant Past vs Future contrasts within domains (p < 0.05):")
+  print(significant_time_domain)
+  
+  print("\nCohen's d calculations for Past vs Future within each domain:")
+  
+  for(i in seq_len(nrow(time_domain_simple_df))) {
+    comparison <- time_domain_simple_df[i, ]
+    domain_level <- as.character(comparison$DOMAIN)
+    
+    # Get data for Past and Future within this domain
+    past_data <- long_data_clean$MEAN_DIFFERENCE[
+      long_data_clean$DOMAIN == domain_level & 
+      long_data_clean$TIME == "Past"
+    ]
+    
+    future_data <- long_data_clean$MEAN_DIFFERENCE[
+      long_data_clean$DOMAIN == domain_level & 
+      long_data_clean$TIME == "Future"
+    ]
+    
+    if(length(past_data) > 0 && length(future_data) > 0) {
+      # Calculate Cohen's d using effsize package
+      cohens_d_result <- cohen.d(past_data, future_data)
+      
+      cat(sprintf("Domain: %s\n", domain_level))
+      cat(sprintf("  Past vs Future comparison\n"))
+      cat(sprintf("  n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
+      cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+      cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+      cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+      cat(sprintf("  Estimated difference: %.5f\n", comparison$estimate))
+      cat("\n")
+    }
+  }
+} else {
+  cat("No significant Past vs Future contrasts found within any domain.\n")
+}
+
+# INTERACTION PLOTS
+
+# Define color palettes
+time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
+domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F", 
+                   "Values" = "#FFB000", "Life" = "#FE6100")
+
+# Define TIME levels (Past, Future order)
+time_levels <- c("Past", "Future")
+
+
+
+# ============================================================
+# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
+# ============================================================
+
+# Create fresh emmeans data for Plot 3
+emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
+
+emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
+  as.data.frame() %>%
+  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
+  rename(
+    ci_lower = lower.CL,
+    ci_upper = upper.CL,
+    plot_mean = emmean
+  ) %>%
+  mutate(
+    TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
+    TIME = factor(TIME, levels = time_levels),
+    x_pos = as.numeric(TEMPORAL_DO),
+    time_offset = (as.numeric(TIME) - 1.5) * 0.3,
+    x_dodged = x_pos + time_offset
+  )
+
+# Create simple emmeans-only plot
+interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
+  geom_errorbar(
+    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
+    width = 0.1,
+    linewidth = 1,
+    alpha = 0.8
+  ) +
+  geom_point(
+    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
+    size = 5,
+    stroke = 1.2,
+    color = "black"
+  ) +
+  labs(
+    x = "Order", 
+    y = "Mean absolute difference from present",
+    title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
+  ) +
+  scale_x_continuous(
+    breaks = c(1, 2),
+    labels = c("Past First", "Future First"),
+    limits = c(0.5, 2.5)
+  ) +
+  scale_color_manual(name = "Temporal Direction", values = time_colors) +
+  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
+  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
+  theme_minimal(base_size = 13) +
+  theme(
+    axis.text = element_text(size = 11),
+    axis.title = element_text(size = 12),
+    plot.title = element_text(size = 14, hjust = 0.5),
+    legend.position = "right",
+    legend.title = element_text(size = 11),
+    legend.text = element_text(size = 10),
+    panel.grid.major.x = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
+  )
+
+print(interaction_plot_emmeans_only)
+
+# ============================================================
+# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
+# ============================================================
+
+# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
+emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
+
+# Prepare emmeans data frame for Plot 4
+emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
+  as.data.frame() %>%
+  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
+  rename(
+    ci_lower = lower.CL,
+    ci_upper = upper.CL,
+    plot_mean = emmean
+  ) %>%
+  mutate(
+    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
+    TIME = factor(TIME, levels = time_levels)
+  )
+
+# Prepare raw data for plotting with position offsets
+dodge_width_plot4 <- 0.2
+iPlot_plot4 <- long_data_clean %>%
+  dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
+  mutate(
+    DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
+    TIME = factor(TIME, levels = time_levels),
+    x_pos = as.numeric(DOMAIN),
+    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
+    x_dodged = x_pos + time_offset
+  )
+
+emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
+  mutate(
+    x_pos = as.numeric(DOMAIN),
+    time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
+    x_dodged = x_pos + time_offset
+  )
+
+# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
+interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
+  geom_errorbar(
+    aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
+    width = 0.1,
+    linewidth = 1,
+    alpha = 0.8
+  ) +
+  geom_point(
+    aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
+    size = 5,
+    stroke = 1.2,
+    color = "black"
+  ) +
+  labs(
+    x = "Domain", 
+    y = "Mean absolute difference from present",
+    title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
+  ) +
+  scale_x_continuous(
+    breaks = c(1, 2, 3, 4),
+    labels = c("Preferences", "Personality", "Values", "Life"),
+    limits = c(0.5, 4.5)
+  ) +
+  scale_color_manual(name = "Temporal Direction", values = time_colors) +
+  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
+  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
+  theme_minimal(base_size = 13) +
+  theme(
+    axis.text = element_text(size = 11),
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    axis.title = element_text(size = 12),
+    plot.title = element_text(size = 14, hjust = 0.5),
+    legend.position = "right",
+    legend.title = element_text(size = 11),
+    legend.text = element_text(size = 10),
+    panel.grid.major.x = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
+  )
+
+print(interaction_plot_time_domain_plot4)
+
+# ============================================================
+# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
+# ============================================================
+
+# Prepare emmeans data frame for TIME main effect
+time_main_emm_df <- time_emmeans %>%
+  as.data.frame() %>%
+  filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
+  rename(
+    ci_lower = lower.CL,
+    ci_upper = upper.CL,
+    plot_mean = emmean
+  ) %>%
+  mutate(
+    TIME = factor(TIME, levels = c("Past", "Future"))
+  )
+
+# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
+time_main_plot <- ggplot(time_main_emm_df) +
+  geom_errorbar(
+    aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
+    width = 0.15,
+    linewidth = 1,
+    alpha = 0.8
+  ) +
+  geom_point(
+    aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
+    size = 5,
+    stroke = 1.2,
+    color = "black"
+  ) +
+  labs(
+    x = "Time",
+    y = "Mean absolute difference from present",
+    title = "Main Effect of TIME (Estimated Marginal Means)"
+  ) +
+  scale_color_manual(name = "Temporal Direction", values = time_colors) +
+  scale_fill_manual(name = "Temporal Direction", values = time_colors) +
+  scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
+  theme_minimal(base_size = 13) +
+  theme(
+    axis.text = element_text(size = 11),
+    axis.title = element_text(size = 12),
+    plot.title = element_text(size = 14, hjust = 0.5),
+    legend.position = "right",
+    legend.title = element_text(size = 11),
+    legend.text = element_text(size = 10),
+    panel.grid.major.x = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
+  )
+
+print(time_main_plot)
--- a/personality.r
+++ b/personality.r
@ -0,0 +1,765 @@
+# Mixed ANOVA Analysis for Personality Items
+# EOHI Experiment Data Analysis - Item Level Analysis
+# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
+#           NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
+
+# Load required libraries
+library(tidyverse)
+library(ez)
+library(car)
+library(afex)         # For aov_ez (cleaner ANOVA output)
+library(nortest)      # For normality tests
+library(emmeans)      # For post-hoc comparisons
+library(purrr)        # For map functions
+library(effsize)      # For Cohen's d calculations
+library(effectsize)   # For effect size calculations
+
+# Global options to remove scientific notation
+options(scipen = 999)
+
+# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
+options(contrasts = c("contr.sum", "contr.poly"))
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+# Display basic information about the dataset
+print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
+print(paste("Number of participants:", length(unique(data$pID))))
+
+# Verify the specific variables we need
+required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
+                   "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
+
+missing_vars <- required_vars[!required_vars %in% colnames(data)]
+if (length(missing_vars) > 0) {
+  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
+} else {
+  print("All required personality item variables found!")
+}
+
+# Define item mapping
+item_mapping <- data.frame(
+  variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
+               "NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
+  time = c(rep("Past", 5), rep("Future", 5)),
+  item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
+  stringsAsFactors = FALSE
+)
+
+# Item mapping created
+
+# Efficient data pivoting using pivot_longer
+long_data <- data %>%
+  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
+  pivot_longer(
+    cols = all_of(required_vars),
+    names_to = "variable",
+    values_to = "MEAN_DIFFERENCE"
+  ) %>%
+  left_join(item_mapping, by = "variable") %>%
+  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
+  mutate(
+    TIME = factor(time, levels = c("Past", "Future")),
+    ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
+    pID = as.factor(pID),
+    TEMPORAL_DO = as.factor(TEMPORAL_DO)
+  ) %>%
+  # Select final columns and remove any rows with missing values
+  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
+  filter(!is.na(MEAN_DIFFERENCE))
+
+print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
+print(paste("Number of participants:", length(unique(long_data$pID))))
+
+# =============================================================================
+# DESCRIPTIVE STATISTICS
+# =============================================================================
+
+# Overall descriptive statistics by TIME and ITEM
+desc_stats <- long_data %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
+    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
+    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TIME and ITEM:")
+print(desc_stats)
+
+# Descriptive statistics by between-subjects factors
+desc_stats_by_temporal <- long_data %>%
+  group_by(TEMPORAL_DO, TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
+print(desc_stats_by_temporal)
+
+
+# =============================================================================
+# ASSUMPTION TESTING
+# =============================================================================
+
+# Remove missing values for assumption testing
+long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
+print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
+
+# 1. Missing values check
+missing_summary <- long_data %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n_total = n(),
+    n_missing = sum(is.na(MEAN_DIFFERENCE)),
+    pct_missing = round(100 * n_missing / n_total, 2),
+    .groups = 'drop'
+  )
+
+print("Missing values by TIME and ITEM:")
+print(missing_summary)
+
+# 2. Outlier detection
+outlier_summary <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = mean(MEAN_DIFFERENCE),
+    sd = sd(MEAN_DIFFERENCE),
+    q1 = quantile(MEAN_DIFFERENCE, 0.25),
+    q3 = quantile(MEAN_DIFFERENCE, 0.75),
+    iqr = q3 - q1,
+    lower_bound = q1 - 1.5 * iqr,
+    upper_bound = q3 + 1.5 * iqr,
+    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
+    .groups = 'drop'
+  )
+
+print("Outlier summary (IQR method):")
+print(outlier_summary)
+
+# 3. Anderson-Darling normality test (streamlined)
+normality_results <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
+    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
+    .groups = 'drop'
+  )
+
+print("Anderson-Darling normality test results:")
+# Round only the numeric columns
+normality_results_rounded <- normality_results %>%
+  mutate(across(where(is.numeric), ~ round(.x, 5)))
+print(normality_results_rounded)
+
+# 4. Homogeneity of variance (Levene's test)
+# Test homogeneity across TIME within each ITEM
+homogeneity_time <- long_data_clean %>%
+  group_by(ITEM) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across TIME within each ITEM:")
+print(homogeneity_time)
+
+# Test homogeneity across ITEM within each TIME
+homogeneity_item <- long_data_clean %>%
+  group_by(TIME) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across ITEM within each TIME:")
+print(homogeneity_item)
+
+# =============================================================================
+# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
+# =============================================================================
+
+# Function to calculate Hartley's F-max ratio
+calculate_hartley_ratio <- function(variances) {
+  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
+}
+
+# =============================================================================
+# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
+# =============================================================================
+
+# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
+# within each combination of within-subjects factors (TIME × ITEM)
+
+# First, let's check what values TEMPORAL_DO actually has
+print("=== CHECKING TEMPORAL_DO VALUES ===")
+print("Unique TEMPORAL_DO values:")
+print(unique(long_data_clean$TEMPORAL_DO))
+print("TEMPORAL_DO value counts:")
+print(table(long_data_clean$TEMPORAL_DO))
+
+print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
+
+observed_temporal_ratios <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
+    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
+    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
+    # Calculate F-max ratio
+    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
+    .groups = 'drop'
+  ) %>%
+  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
+
+print(observed_temporal_ratios)
+
+# More efficient bootstrap function for Hartley's F-max test
+bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
+  # Get unique groups and their sample sizes
+  groups <- unique(data[[group_var]])
+  
+  # Calculate observed variances for each group
+  observed_vars <- data %>%
+    dplyr::group_by(!!sym(group_var)) %>%
+    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
+    dplyr::pull(var)
+  
+  # Handle invalid variances
+  if(any(observed_vars <= 0 | is.na(observed_vars))) {
+    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
+  }
+  
+  # Calculate observed F-max ratio
+  observed_ratio <- max(observed_vars) / min(observed_vars)
+  
+  # Pre-allocate storage for bootstrap ratios
+  bootstrap_ratios <- numeric(n_iter)
+  
+  # Get group data once
+  group_data_list <- map(groups, ~ {
+    group_data <- data[data[[group_var]] == .x, response_var]
+    group_data[!is.na(group_data)]
+  })
+  
+  # Bootstrap with pre-allocated storage
+  for(i in 1:n_iter) {
+    # Bootstrap sample from each group independently
+    sample_vars <- map_dbl(group_data_list, ~ {
+      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
+      var(bootstrap_sample, na.rm = TRUE)
+    })
+    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
+  }
+  
+  # Remove invalid ratios
+  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
+  
+  if(length(valid_ratios) == 0) {
+    stop("No valid bootstrap ratios generated")
+  }
+  
+  # Calculate critical value (95th percentile)
+  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
+  
+  # Return only essential information
+  return(list(
+    observed_ratio = observed_ratio,
+    critical_95 = critical_95,
+    n_valid_iterations = length(valid_ratios)
+  ))
+}
+
+# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
+print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
+set.seed(123)  # For reproducibility
+
+hartley_temporal_results <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
+    .groups = 'drop'
+  ) %>%
+  mutate(
+    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
+    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
+    significant = observed_ratio > critical_95
+  ) %>%
+  select(TIME, ITEM, observed_ratio, critical_95, significant)
+
+print(hartley_temporal_results)
+
+# =============================================================================
+# MIXED ANOVA ANALYSIS
+# =============================================================================
+
+# Check for missing data patterns
+table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
+
+# Check data balance
+xtabs(~ pID + TIME + ITEM, data = long_data_clean)
+
+# Check data dimensions and structure
+print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
+print(paste("Number of participants:", length(unique(long_data_clean$pID))))
+print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
+print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
+print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
+
+# Check for complete cases
+complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
+print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
+
+# Check if design is balanced
+design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
+
+print(summary(as.vector(design_balance)))
+
+# Check for any participants with missing combinations
+missing_combos <- long_data_clean %>%
+  group_by(pID) %>%
+  summarise(
+    n_combinations = n(),
+    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
+    missing_combinations = 10 - n_combinations,
+    .groups = 'drop'
+  )
+
+print("Missing combinations per participant:")
+print(missing_combos[missing_combos$missing_combinations > 0, ])
+
+# Mixed ANOVA using aov() - Traditional approach
+# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
+# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
+
+mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
+                        data = long_data_clean)
+
+print("Mixed ANOVA Results (aov):")
+print(summary(mixed_anova_model))
+
+# Alternative: Using afex::aov_ez for cleaner output (optional)
+print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
+mixed_anova_afex <- aov_ez(id = "pID",
+                          dv = "MEAN_DIFFERENCE",
+                          data = long_data_clean,
+                          between = "TEMPORAL_DO",
+                          within = c("TIME", "ITEM"))
+
+print("Mixed ANOVA Results (afex):")
+print(mixed_anova_afex)
+
+# =============================================================================
+# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
+# =============================================================================
+
+# Sphericity tests using ezANOVA (library already loaded)
+
+print("\n=== SPHERICITY TESTS ===")
+
+# Test sphericity for ITEM (5 levels - within-subjects)
+print("Mauchly's Test of Sphericity for ITEM:")
+tryCatch({
+  # Create a temporary data frame for ezANOVA
+  temp_data <- long_data_clean
+  temp_data$id <- as.numeric(as.factor(temp_data$pID))
+  
+  # Run ezANOVA to get sphericity tests
+  ez_item <- ezANOVA(data = temp_data, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = id, 
+                       within = ITEM,
+                       type = 3,
+                       detailed = TRUE)
+  
+  print("ITEM Sphericity Test:")
+  print(ez_item$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in ITEM sphericity test:", e$message))
+})
+
+# Test sphericity for TIME (2 levels - within-subjects)
+print("\nMauchly's Test of Sphericity for TIME:")
+tryCatch({
+  ez_time <- ezANOVA(data = temp_data, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = id, 
+                       within = TIME,
+                     type = 3,
+                     detailed = TRUE)
+  
+  print("TIME Sphericity Test:")
+  print(ez_time$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in TIME sphericity test:", e$message))
+})
+
+# Test sphericity for TIME × ITEM interaction
+print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
+tryCatch({
+  ez_interaction <- ezANOVA(data = temp_data, 
+                           dv = MEAN_DIFFERENCE, 
+                           wid = id, 
+                           within = .(TIME, ITEM),
+                           type = 3,
+                           detailed = TRUE)
+  
+  print("TIME × ITEM Sphericity Test:")
+  print(ez_interaction$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in TIME × ITEM sphericity test:", e$message))
+})
+
+# =============================================================================
+# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
+# =============================================================================
+
+print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
+
+# Get corrected results from ezANOVA
+ez_corrected <- ezANOVA(data = long_data_clean, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = pID, 
+                       within = .(TIME, ITEM),
+                       type = 3,
+                       detailed = TRUE)
+
+print("Corrected ANOVA Results with Sphericity Corrections:")
+print(ez_corrected$ANOVA)
+
+# Show epsilon values for sphericity corrections
+print("\nEpsilon Values for Sphericity Corrections:")
+print(ez_corrected$Mauchly)
+
+# Show sphericity-corrected results
+print("\nSphericity-Corrected Results:")
+print("Available elements in ez_corrected object:")
+print(names(ez_corrected))
+
+# Check if sphericity corrections are available
+if(!is.null(ez_corrected$`Sphericity Corrections`)) {
+  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
+  print(ez_corrected$`Sphericity Corrections`)
+  
+  # Extract and display corrected degrees of freedom
+  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
+  
+  # Get the sphericity corrections
+  sphericity_corr <- ez_corrected$`Sphericity Corrections`
+  
+  # Extract original degrees of freedom from ANOVA table
+  anova_table <- ez_corrected$ANOVA
+  
+  # Calculate corrected degrees of freedom
+  corrected_df <- data.frame(
+    Effect = sphericity_corr$Effect,
+    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
+    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
+    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    GG_epsilon = sphericity_corr$GGe,
+    HF_epsilon = sphericity_corr$HFe
+  )
+  
+  print(corrected_df)
+  
+  # Also show the corrected F-values and p-values with degrees of freedom
+  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
+  for(i in 1:nrow(corrected_df)) {
+    effect <- corrected_df$Effect[i]
+    f_value <- anova_table$F[match(effect, anova_table$Effect)]
+    
+    cat(sprintf("\n%s:\n", effect))
+    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
+                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
+    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
+    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
+  }
+} else {
+  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
+  print("Check the Mauchly's test p-values above to determine if corrections are needed")
+}
+
+# =============================================================================
+# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
+# =============================================================================
+
+print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
+
+# Create a wide-format data for car package (library already loaded)
+
+tryCatch({
+  # Convert to wide format for car package
+  wide_data <- long_data_clean %>%
+    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
+    pivot_wider(names_from = c(TIME, ITEM), 
+                values_from = MEAN_DIFFERENCE,
+                names_sep = "_")
+  
+  # Create the repeated measures design
+  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
+                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
+  
+  # Check if all columns exist
+  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
+  if(length(missing_cols) > 0) {
+    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
+  } else {
+    # Create the repeated measures design
+    rm_design <- as.matrix(wide_data[, within_vars])
+    
+    # Calculate epsilon values
+    print("Epsilon Values from car package:")
+    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
+    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
+    
+    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
+    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
+    
+    # Interpretation
+    if(epsilon_gg < 0.75) {
+      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
+    } else if(epsilon_hf > 0.75) {
+      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
+    } else {
+      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
+    }
+    
+    # =============================================================================
+    # MANUAL SPHERICITY CORRECTIONS
+    # =============================================================================
+    
+    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
+    
+    # Apply corrections to the original ANOVA results
+    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
+    
+    # ITEM main effect (DFn = 4, DFd = 4244)
+    item_df_corrected_gg <- 4 * epsilon_gg
+    item_df_corrected_hf <- 4 * epsilon_hf
+    
+    print(paste("ITEM: Original df = 4, 4244"))
+    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
+    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
+    
+    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
+    interaction_df_corrected_gg <- 4 * epsilon_gg
+    interaction_df_corrected_hf <- 4 * epsilon_hf
+    
+    print(paste("TIME × ITEM: Original df = 4, 4244"))
+    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
+    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
+    
+    # Note: You would need to recalculate p-values with these corrected dfs
+    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
+    print("The ezANOVA function should handle this automatically, but may not display the corrections")
+  }
+  
+}, error = function(e) {
+  print(paste("Error in manual epsilon calculation:", e$message))
+})
+
+# =============================================================================
+# EFFECT SIZES (GENERALIZED ETA SQUARED)
+# =============================================================================
+
+# Effect size calculations (library already loaded)
+
+print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
+
+# Calculate generalized eta squared for the aov model
+print("Effect Sizes from aov() model:")
+tryCatch({
+  # Extract effect sizes from aov model
+  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
+  print(round(aov_effects, 5))
+}, error = function(e) {
+  print(paste("Error calculating effect sizes from aov:", e$message))
+})
+
+# Calculate effect sizes for ezANOVA model
+print("\nEffect Sizes from ezANOVA model:")
+tryCatch({
+  # ezANOVA provides partial eta squared, convert to generalized
+  ez_effects <- ez_corrected$ANOVA
+  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
+  print("Generalized Eta Squared from ezANOVA:")
+  print(round(ez_effects[, c("Effect", "ges")], 5))
+}, error = function(e) {
+  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
+})
+
+# Extract effect sizes (generalized eta squared)
+# For aov() objects, we need to extract from the summary
+anova_summary <- summary(mixed_anova_model)
+
+# =============================================================================
+# NOTE: MIXED MODELS (LMER) NOT NEEDED
+# =============================================================================
+
+# For this balanced repeated measures design, Type III ANOVA with proper 
+# sphericity corrections (implemented above) is the most appropriate approach.
+# Mixed models (lmer) are typically used for:
+# - Unbalanced designs
+# - Missing data patterns
+# - Nested random effects
+# - Large, complex datasets
+#
+# Your design is balanced and complete, making Type III ANOVA optimal.
+
+# =============================================================================
+# POST-HOC COMPARISONS
+# =============================================================================
+
+# Post-hoc comparisons using emmeans
+print("\n=== POST-HOC COMPARISONS ===")
+
+# Main effect of TIME
+print("Main Effect of TIME:")
+time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
+print("Estimated Marginal Means:")
+print(time_emmeans)
+print("\nPairwise Contrasts:")
+time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
+print(time_contrasts)
+
+# Main effect of ITEM
+print("\nMain Effect of ITEM:")
+item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
+print("Estimated Marginal Means:")
+print(item_emmeans)
+print("\nPairwise Contrasts:")
+item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
+print(item_contrasts)
+
+
+# =============================================================================
+# INTERACTION EXPLORATIONS
+# =============================================================================
+
+
+# TIME × ITEM Interaction
+print("\n=== TIME × ITEM INTERACTION ===")
+time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
+print("Estimated Marginal Means:")
+print(time_item_emmeans)
+
+print("\nSimple Effects of ITEM within each TIME:")
+time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
+print(time_item_simple)
+
+print("\nSimple Effects of TIME within each ITEM:")
+time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
+print(time_item_simple2)
+
+
+# =============================================================================
+# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
+# =============================================================================
+
+
+# =============================================================================
+# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
+# =============================================================================
+
+# Cohen's d calculations (library already loaded)
+
+print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
+
+# Function to calculate Cohen's d for pairwise comparisons
+calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
+  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
+  
+  if(nrow(significant_pairs) > 0) {
+    cat("Significant pairwise comparisons (p < 0.05):\n")
+    print(significant_pairs)
+    
+    cat("\nCohen's d calculated from raw data:\n")
+    
+    for(i in seq_len(nrow(significant_pairs))) {
+      comparison <- significant_pairs[i, ]
+      contrast_name <- as.character(comparison$contrast)
+      
+      # Parse the contrast
+      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
+      if(length(contrast_parts) == 2) {
+        level1 <- trimws(contrast_parts[1])
+        level2 <- trimws(contrast_parts[2])
+        
+        # Get raw data for both conditions
+        if(group2_var %in% colnames(comparison)) {
+          group2_level <- as.character(comparison[[group2_var]])
+          data1 <- data[[response_var]][
+            data[[group1_var]] == level1 & 
+            data[[group2_var]] == group2_level]
+          
+          data2 <- data[[response_var]][
+            data[[group1_var]] == level2 & 
+            data[[group2_var]] == group2_level]
+        } else {
+          data1 <- data[[response_var]][data[[group1_var]] == level1]
+          data2 <- data[[response_var]][data[[group1_var]] == level2]
+        }
+        
+        if(length(data1) > 0 && length(data2) > 0) {
+          # Calculate Cohen's d using effsize package
+          cohens_d_result <- cohen.d(data1, data2)
+          
+          cat(sprintf("Comparison: %s", contrast_name))
+          if(group2_var %in% colnames(comparison)) {
+            cat(sprintf(" | %s", group2_level))
+          }
+          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
+          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+          cat("\n")
+        }
+      }
+    }
+  } else {
+    cat("No significant pairwise comparisons found.\n")
+  }
+}
+
+
+# =============================================================================
+# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
+# =============================================================================
+
+print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
+
+# Get simple effects of TIME within each ITEM
+time_item_simple2_df <- as.data.frame(time_item_simple2)
+calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
+
+# Get simple effects of ITEM within each TIME
+time_item_simple_df <- as.data.frame(time_item_simple)
+calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
+
+
--- a/preferences.r
+++ b/preferences.r
@ -0,0 +1,765 @@
+# Mixed ANOVA Analysis for Preference Items
+# EOHI Experiment Data Analysis - Item Level Analysis
+# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
+#           NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
+
+# Load required libraries
+library(tidyverse)
+library(ez)
+library(car)
+library(afex)         # For aov_ez (cleaner ANOVA output)
+library(nortest)      # For normality tests
+library(emmeans)      # For post-hoc comparisons
+library(purrr)        # For map functions
+library(effsize)      # For Cohen's d calculations
+library(effectsize)   # For effect size calculations
+
+# Global options to remove scientific notation
+options(scipen = 999)
+
+# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
+options(contrasts = c("contr.sum", "contr.poly"))
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+# Display basic information about the dataset
+print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
+print(paste("Number of participants:", length(unique(data$pID))))
+
+# Verify the specific variables we need
+required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
+                   "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
+
+missing_vars <- required_vars[!required_vars %in% colnames(data)]
+if (length(missing_vars) > 0) {
+  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
+} else {
+  print("All required preference item variables found!")
+}
+
+# Define item mapping
+item_mapping <- data.frame(
+  variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
+               "NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
+  time = c(rep("Past", 5), rep("Future", 5)),
+  item = rep(c("read", "music", "tv", "nap", "travel"), 2),
+  stringsAsFactors = FALSE
+)
+
+# Item mapping created
+
+# Efficient data pivoting using pivot_longer
+long_data <- data %>%
+  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
+  pivot_longer(
+    cols = all_of(required_vars),
+    names_to = "variable",
+    values_to = "MEAN_DIFFERENCE"
+  ) %>%
+  left_join(item_mapping, by = "variable") %>%
+  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
+  mutate(
+    TIME = factor(time, levels = c("Past", "Future")),
+    ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
+    pID = as.factor(pID),
+    TEMPORAL_DO = as.factor(TEMPORAL_DO)
+  ) %>%
+  # Select final columns and remove any rows with missing values
+  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
+  filter(!is.na(MEAN_DIFFERENCE))
+
+print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
+print(paste("Number of participants:", length(unique(long_data$pID))))
+
+# =============================================================================
+# DESCRIPTIVE STATISTICS
+# =============================================================================
+
+# Overall descriptive statistics by TIME and ITEM
+desc_stats <- long_data %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
+    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
+    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TIME and ITEM:")
+print(desc_stats)
+
+# Descriptive statistics by between-subjects factors
+desc_stats_by_temporal <- long_data %>%
+  group_by(TEMPORAL_DO, TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
+print(desc_stats_by_temporal)
+
+
+# =============================================================================
+# ASSUMPTION TESTING
+# =============================================================================
+
+# Remove missing values for assumption testing
+long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
+print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
+
+# 1. Missing values check
+missing_summary <- long_data %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n_total = n(),
+    n_missing = sum(is.na(MEAN_DIFFERENCE)),
+    pct_missing = round(100 * n_missing / n_total, 2),
+    .groups = 'drop'
+  )
+
+print("Missing values by TIME and ITEM:")
+print(missing_summary)
+
+# 2. Outlier detection
+outlier_summary <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = mean(MEAN_DIFFERENCE),
+    sd = sd(MEAN_DIFFERENCE),
+    q1 = quantile(MEAN_DIFFERENCE, 0.25),
+    q3 = quantile(MEAN_DIFFERENCE, 0.75),
+    iqr = q3 - q1,
+    lower_bound = q1 - 1.5 * iqr,
+    upper_bound = q3 + 1.5 * iqr,
+    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
+    .groups = 'drop'
+  )
+
+print("Outlier summary (IQR method):")
+print(outlier_summary)
+
+# 3. Anderson-Darling normality test (streamlined)
+normality_results <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
+    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
+    .groups = 'drop'
+  )
+
+print("Anderson-Darling normality test results:")
+# Round only the numeric columns
+normality_results_rounded <- normality_results %>%
+  mutate(across(where(is.numeric), ~ round(.x, 5)))
+print(normality_results_rounded)
+
+# 4. Homogeneity of variance (Levene's test)
+# Test homogeneity across TIME within each ITEM
+homogeneity_time <- long_data_clean %>%
+  group_by(ITEM) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across TIME within each ITEM:")
+print(homogeneity_time)
+
+# Test homogeneity across ITEM within each TIME
+homogeneity_item <- long_data_clean %>%
+  group_by(TIME) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across ITEM within each TIME:")
+print(homogeneity_item)
+
+# =============================================================================
+# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
+# =============================================================================
+
+# Function to calculate Hartley's F-max ratio
+calculate_hartley_ratio <- function(variances) {
+  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
+}
+
+# =============================================================================
+# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
+# =============================================================================
+
+# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
+# within each combination of within-subjects factors (TIME × ITEM)
+
+# First, let's check what values TEMPORAL_DO actually has
+print("=== CHECKING TEMPORAL_DO VALUES ===")
+print("Unique TEMPORAL_DO values:")
+print(unique(long_data_clean$TEMPORAL_DO))
+print("TEMPORAL_DO value counts:")
+print(table(long_data_clean$TEMPORAL_DO))
+
+print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
+
+observed_temporal_ratios <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
+    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
+    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
+    # Calculate F-max ratio
+    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
+    .groups = 'drop'
+  ) %>%
+  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
+
+print(observed_temporal_ratios)
+
+# More efficient bootstrap function for Hartley's F-max test
+bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
+  # Get unique groups and their sample sizes
+  groups <- unique(data[[group_var]])
+  
+  # Calculate observed variances for each group
+  observed_vars <- data %>%
+    dplyr::group_by(!!sym(group_var)) %>%
+    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
+    dplyr::pull(var)
+  
+  # Handle invalid variances
+  if(any(observed_vars <= 0 | is.na(observed_vars))) {
+    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
+  }
+  
+  # Calculate observed F-max ratio
+  observed_ratio <- max(observed_vars) / min(observed_vars)
+  
+  # Pre-allocate storage for bootstrap ratios
+  bootstrap_ratios <- numeric(n_iter)
+  
+  # Get group data once
+  group_data_list <- map(groups, ~ {
+    group_data <- data[data[[group_var]] == .x, response_var]
+    group_data[!is.na(group_data)]
+  })
+  
+  # Bootstrap with pre-allocated storage
+  for(i in 1:n_iter) {
+    # Bootstrap sample from each group independently
+    sample_vars <- map_dbl(group_data_list, ~ {
+      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
+      var(bootstrap_sample, na.rm = TRUE)
+    })
+    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
+  }
+  
+  # Remove invalid ratios
+  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
+  
+  if(length(valid_ratios) == 0) {
+    stop("No valid bootstrap ratios generated")
+  }
+  
+  # Calculate critical value (95th percentile)
+  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
+  
+  # Return only essential information
+  return(list(
+    observed_ratio = observed_ratio,
+    critical_95 = critical_95,
+    n_valid_iterations = length(valid_ratios)
+  ))
+}
+
+# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
+print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
+set.seed(123)  # For reproducibility
+
+hartley_temporal_results <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
+    .groups = 'drop'
+  ) %>%
+  mutate(
+    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
+    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
+    significant = observed_ratio > critical_95
+  ) %>%
+  select(TIME, ITEM, observed_ratio, critical_95, significant)
+
+print(hartley_temporal_results)
+
+# =============================================================================
+# MIXED ANOVA ANALYSIS
+# =============================================================================
+
+# Check for missing data patterns
+table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
+
+# Check data balance
+xtabs(~ pID + TIME + ITEM, data = long_data_clean)
+
+# Check data dimensions and structure
+print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
+print(paste("Number of participants:", length(unique(long_data_clean$pID))))
+print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
+print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
+print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
+
+# Check for complete cases
+complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
+print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
+
+# Check if design is balanced
+design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
+
+print(summary(as.vector(design_balance)))
+
+# Check for any participants with missing combinations
+missing_combos <- long_data_clean %>%
+  group_by(pID) %>%
+  summarise(
+    n_combinations = n(),
+    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
+    missing_combinations = 10 - n_combinations,
+    .groups = 'drop'
+  )
+
+print("Missing combinations per participant:")
+print(missing_combos[missing_combos$missing_combinations > 0, ])
+
+# Mixed ANOVA using aov() - Traditional approach
+# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
+# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
+
+mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
+                        data = long_data_clean)
+
+print("Mixed ANOVA Results (aov):")
+print(summary(mixed_anova_model))
+
+# Alternative: Using afex::aov_ez for cleaner output (optional)
+print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
+mixed_anova_afex <- aov_ez(id = "pID",
+                          dv = "MEAN_DIFFERENCE",
+                          data = long_data_clean,
+                          between = "TEMPORAL_DO",
+                          within = c("TIME", "ITEM"))
+
+print("Mixed ANOVA Results (afex):")
+print(mixed_anova_afex)
+
+# =============================================================================
+# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
+# =============================================================================
+
+# Sphericity tests using ezANOVA (library already loaded)
+
+print("\n=== SPHERICITY TESTS ===")
+
+# Test sphericity for ITEM (5 levels - within-subjects)
+print("Mauchly's Test of Sphericity for ITEM:")
+tryCatch({
+  # Create a temporary data frame for ezANOVA
+  temp_data <- long_data_clean
+  temp_data$id <- as.numeric(as.factor(temp_data$pID))
+  
+  # Run ezANOVA to get sphericity tests
+  ez_item <- ezANOVA(data = temp_data, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = id, 
+                       within = ITEM,
+                       type = 3,
+                       detailed = TRUE)
+  
+  print("ITEM Sphericity Test:")
+  print(ez_item$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in ITEM sphericity test:", e$message))
+})
+
+# Test sphericity for TIME (2 levels - within-subjects)
+print("\nMauchly's Test of Sphericity for TIME:")
+tryCatch({
+  ez_time <- ezANOVA(data = temp_data, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = id, 
+                       within = TIME,
+                     type = 3,
+                     detailed = TRUE)
+  
+  print("TIME Sphericity Test:")
+  print(ez_time$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in TIME sphericity test:", e$message))
+})
+
+# Test sphericity for TIME × ITEM interaction
+print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
+tryCatch({
+  ez_interaction <- ezANOVA(data = temp_data, 
+                           dv = MEAN_DIFFERENCE, 
+                           wid = id, 
+                           within = .(TIME, ITEM),
+                           type = 3,
+                           detailed = TRUE)
+  
+  print("TIME × ITEM Sphericity Test:")
+  print(ez_interaction$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in TIME × ITEM sphericity test:", e$message))
+})
+
+# =============================================================================
+# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
+# =============================================================================
+
+print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
+
+# Get corrected results from ezANOVA
+ez_corrected <- ezANOVA(data = long_data_clean, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = pID, 
+                       within = .(TIME, ITEM),
+                       type = 3,
+                       detailed = TRUE)
+
+print("Corrected ANOVA Results with Sphericity Corrections:")
+print(ez_corrected$ANOVA)
+
+# Show epsilon values for sphericity corrections
+print("\nEpsilon Values for Sphericity Corrections:")
+print(ez_corrected$Mauchly)
+
+# Show sphericity-corrected results
+print("\nSphericity-Corrected Results:")
+print("Available elements in ez_corrected object:")
+print(names(ez_corrected))
+
+# Check if sphericity corrections are available
+if(!is.null(ez_corrected$`Sphericity Corrections`)) {
+  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
+  print(ez_corrected$`Sphericity Corrections`)
+  
+  # Extract and display corrected degrees of freedom
+  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
+  
+  # Get the sphericity corrections
+  sphericity_corr <- ez_corrected$`Sphericity Corrections`
+  
+  # Extract original degrees of freedom from ANOVA table
+  anova_table <- ez_corrected$ANOVA
+  
+  # Calculate corrected degrees of freedom
+  corrected_df <- data.frame(
+    Effect = sphericity_corr$Effect,
+    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
+    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
+    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    GG_epsilon = sphericity_corr$GGe,
+    HF_epsilon = sphericity_corr$HFe
+  )
+  
+  print(corrected_df)
+  
+  # Also show the corrected F-values and p-values with degrees of freedom
+  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
+  for(i in 1:nrow(corrected_df)) {
+    effect <- corrected_df$Effect[i]
+    f_value <- anova_table$F[match(effect, anova_table$Effect)]
+    
+    cat(sprintf("\n%s:\n", effect))
+    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
+                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
+    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
+    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
+  }
+} else {
+  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
+  print("Check the Mauchly's test p-values above to determine if corrections are needed")
+}
+
+# =============================================================================
+# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
+# =============================================================================
+
+print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
+
+# Create a wide-format data for car package (library already loaded)
+
+tryCatch({
+  # Convert to wide format for car package
+  wide_data <- long_data_clean %>%
+    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
+    pivot_wider(names_from = c(TIME, ITEM), 
+                values_from = MEAN_DIFFERENCE,
+                names_sep = "_")
+  
+  # Create the repeated measures design
+  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
+                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
+  
+  # Check if all columns exist
+  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
+  if(length(missing_cols) > 0) {
+    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
+  } else {
+    # Create the repeated measures design
+    rm_design <- as.matrix(wide_data[, within_vars])
+    
+    # Calculate epsilon values
+    print("Epsilon Values from car package:")
+    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
+    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
+    
+    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
+    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
+    
+    # Interpretation
+    if(epsilon_gg < 0.75) {
+      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
+    } else if(epsilon_hf > 0.75) {
+      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
+    } else {
+      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
+    }
+    
+    # =============================================================================
+    # MANUAL SPHERICITY CORRECTIONS
+    # =============================================================================
+    
+    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
+    
+    # Apply corrections to the original ANOVA results
+    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
+    
+    # ITEM main effect (DFn = 4, DFd = 4244)
+    item_df_corrected_gg <- 4 * epsilon_gg
+    item_df_corrected_hf <- 4 * epsilon_hf
+    
+    print(paste("ITEM: Original df = 4, 4244"))
+    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
+    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
+    
+    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
+    interaction_df_corrected_gg <- 4 * epsilon_gg
+    interaction_df_corrected_hf <- 4 * epsilon_hf
+    
+    print(paste("TIME × ITEM: Original df = 4, 4244"))
+    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
+    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
+    
+    # Note: You would need to recalculate p-values with these corrected dfs
+    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
+    print("The ezANOVA function should handle this automatically, but may not display the corrections")
+  }
+  
+}, error = function(e) {
+  print(paste("Error in manual epsilon calculation:", e$message))
+})
+
+# =============================================================================
+# EFFECT SIZES (GENERALIZED ETA SQUARED)
+# =============================================================================
+
+# Effect size calculations (library already loaded)
+
+print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
+
+# Calculate generalized eta squared for the aov model
+print("Effect Sizes from aov() model:")
+tryCatch({
+  # Extract effect sizes from aov model
+  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
+  print(round(aov_effects, 5))
+}, error = function(e) {
+  print(paste("Error calculating effect sizes from aov:", e$message))
+})
+
+# Calculate effect sizes for ezANOVA model
+print("\nEffect Sizes from ezANOVA model:")
+tryCatch({
+  # ezANOVA provides partial eta squared, convert to generalized
+  ez_effects <- ez_corrected$ANOVA
+  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
+  print("Generalized Eta Squared from ezANOVA:")
+  print(round(ez_effects[, c("Effect", "ges")], 5))
+}, error = function(e) {
+  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
+})
+
+# Extract effect sizes (generalized eta squared)
+# For aov() objects, we need to extract from the summary
+anova_summary <- summary(mixed_anova_model)
+
+# =============================================================================
+# NOTE: MIXED MODELS (LMER) NOT NEEDED
+# =============================================================================
+
+# For this balanced repeated measures design, Type III ANOVA with proper 
+# sphericity corrections (implemented above) is the most appropriate approach.
+# Mixed models (lmer) are typically used for:
+# - Unbalanced designs
+# - Missing data patterns
+# - Nested random effects
+# - Large, complex datasets
+#
+# Your design is balanced and complete, making Type III ANOVA optimal.
+
+# =============================================================================
+# POST-HOC COMPARISONS
+# =============================================================================
+
+# Post-hoc comparisons using emmeans
+print("\n=== POST-HOC COMPARISONS ===")
+
+# Main effect of TIME
+print("Main Effect of TIME:")
+time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
+print("Estimated Marginal Means:")
+print(time_emmeans)
+print("\nPairwise Contrasts:")
+time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
+print(time_contrasts)
+
+# Main effect of ITEM
+print("\nMain Effect of ITEM:")
+item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
+print("Estimated Marginal Means:")
+print(item_emmeans)
+print("\nPairwise Contrasts:")
+item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
+print(item_contrasts)
+
+
+# =============================================================================
+# INTERACTION EXPLORATIONS
+# =============================================================================
+
+
+# TIME × ITEM Interaction
+print("\n=== TIME × ITEM INTERACTION ===")
+time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
+print("Estimated Marginal Means:")
+print(time_item_emmeans)
+
+print("\nSimple Effects of ITEM within each TIME:")
+time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
+print(time_item_simple)
+
+print("\nSimple Effects of TIME within each ITEM:")
+time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
+print(time_item_simple2)
+
+
+# =============================================================================
+# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
+# =============================================================================
+
+
+# =============================================================================
+# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
+# =============================================================================
+
+# Cohen's d calculations (library already loaded)
+
+print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
+
+# Function to calculate Cohen's d for pairwise comparisons
+calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
+  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
+  
+  if(nrow(significant_pairs) > 0) {
+    cat("Significant pairwise comparisons (p < 0.05):\n")
+    print(significant_pairs)
+    
+    cat("\nCohen's d calculated from raw data:\n")
+    
+    for(i in seq_len(nrow(significant_pairs))) {
+      comparison <- significant_pairs[i, ]
+      contrast_name <- as.character(comparison$contrast)
+      
+      # Parse the contrast
+      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
+      if(length(contrast_parts) == 2) {
+        level1 <- trimws(contrast_parts[1])
+        level2 <- trimws(contrast_parts[2])
+        
+        # Get raw data for both conditions
+        if(group2_var %in% colnames(comparison)) {
+          group2_level <- as.character(comparison[[group2_var]])
+          data1 <- data[[response_var]][
+            data[[group1_var]] == level1 & 
+            data[[group2_var]] == group2_level]
+          
+          data2 <- data[[response_var]][
+            data[[group1_var]] == level2 & 
+            data[[group2_var]] == group2_level]
+        } else {
+          data1 <- data[[response_var]][data[[group1_var]] == level1]
+          data2 <- data[[response_var]][data[[group1_var]] == level2]
+        }
+        
+        if(length(data1) > 0 && length(data2) > 0) {
+          # Calculate Cohen's d using effsize package
+          cohens_d_result <- cohen.d(data1, data2)
+          
+          cat(sprintf("Comparison: %s", contrast_name))
+          if(group2_var %in% colnames(comparison)) {
+            cat(sprintf(" | %s", group2_level))
+          }
+          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
+          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+          cat("\n")
+        }
+      }
+    }
+  } else {
+    cat("No significant pairwise comparisons found.\n")
+  }
+}
+
+
+# =============================================================================
+# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
+# =============================================================================
+
+print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
+
+# Get simple effects of TIME within each ITEM
+time_item_simple2_df <- as.data.frame(time_item_simple2)
+calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
+
+# Get simple effects of ITEM within each TIME
+time_item_simple_df <- as.data.frame(time_item_simple)
+calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
+
+
--- a/eohi1/mixed
+++ b/eohi1/mixed
@ -0,0 +1,765 @@
+# Mixed ANOVA Analysis for Values Items
+# EOHI Experiment Data Analysis - Item Level Analysis
+# Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
+#           NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
+
+# Load required libraries
+library(tidyverse)
+library(ez)
+library(car)
+library(afex)         # For aov_ez (cleaner ANOVA output)
+library(nortest)      # For normality tests
+library(emmeans)      # For post-hoc comparisons
+library(purrr)        # For map functions
+library(effsize)      # For Cohen's d calculations
+library(effectsize)   # For effect size calculations
+
+# Global options to remove scientific notation
+options(scipen = 999)
+
+# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
+options(contrasts = c("contr.sum", "contr.poly"))
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+# Display basic information about the dataset
+print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
+print(paste("Number of participants:", length(unique(data$pID))))
+
+# Verify the specific variables we need
+required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
+                   "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
+
+missing_vars <- required_vars[!required_vars %in% colnames(data)]
+if (length(missing_vars) > 0) {
+  print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
+} else {
+  print("All required values item variables found!")
+}
+
+# Define item mapping
+item_mapping <- data.frame(
+  variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
+               "NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
+  time = c(rep("Past", 5), rep("Future", 5)),
+  item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
+  stringsAsFactors = FALSE
+)
+
+# Item mapping created
+
+# Efficient data pivoting using pivot_longer
+long_data <- data %>%
+  select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
+  pivot_longer(
+    cols = all_of(required_vars),
+    names_to = "variable",
+    values_to = "MEAN_DIFFERENCE"
+  ) %>%
+  left_join(item_mapping, by = "variable") %>%
+  # Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
+  mutate(
+    TIME = factor(time, levels = c("Past", "Future")),
+    ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
+    pID = as.factor(pID),
+    TEMPORAL_DO = as.factor(TEMPORAL_DO)
+  ) %>%
+  # Select final columns and remove any rows with missing values
+  select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
+  filter(!is.na(MEAN_DIFFERENCE))
+
+print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
+print(paste("Number of participants:", length(unique(long_data$pID))))
+
+# =============================================================================
+# DESCRIPTIVE STATISTICS
+# =============================================================================
+
+# Overall descriptive statistics by TIME and ITEM
+desc_stats <- long_data %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
+    q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
+    min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TIME and ITEM:")
+print(desc_stats)
+
+# Descriptive statistics by between-subjects factors
+desc_stats_by_temporal <- long_data %>%
+  group_by(TEMPORAL_DO, TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
+    .groups = 'drop'
+  )
+
+print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
+print(desc_stats_by_temporal)
+
+
+# =============================================================================
+# ASSUMPTION TESTING
+# =============================================================================
+
+# Remove missing values for assumption testing
+long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
+print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
+
+# 1. Missing values check
+missing_summary <- long_data %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n_total = n(),
+    n_missing = sum(is.na(MEAN_DIFFERENCE)),
+    pct_missing = round(100 * n_missing / n_total, 2),
+    .groups = 'drop'
+  )
+
+print("Missing values by TIME and ITEM:")
+print(missing_summary)
+
+# 2. Outlier detection
+outlier_summary <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    mean = mean(MEAN_DIFFERENCE),
+    sd = sd(MEAN_DIFFERENCE),
+    q1 = quantile(MEAN_DIFFERENCE, 0.25),
+    q3 = quantile(MEAN_DIFFERENCE, 0.75),
+    iqr = q3 - q1,
+    lower_bound = q1 - 1.5 * iqr,
+    upper_bound = q3 + 1.5 * iqr,
+    n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
+    .groups = 'drop'
+  )
+
+print("Outlier summary (IQR method):")
+print(outlier_summary)
+
+# 3. Anderson-Darling normality test (streamlined)
+normality_results <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    n = n(),
+    ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
+    ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
+    .groups = 'drop'
+  )
+
+print("Anderson-Darling normality test results:")
+# Round only the numeric columns
+normality_results_rounded <- normality_results %>%
+  mutate(across(where(is.numeric), ~ round(.x, 5)))
+print(normality_results_rounded)
+
+# 4. Homogeneity of variance (Levene's test)
+# Test homogeneity across TIME within each ITEM
+homogeneity_time <- long_data_clean %>%
+  group_by(ITEM) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across TIME within each ITEM:")
+print(homogeneity_time)
+
+# Test homogeneity across ITEM within each TIME
+homogeneity_item <- long_data_clean %>%
+  group_by(TIME) %>%
+  summarise(
+    levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
+    levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
+    .groups = 'drop'
+  )
+
+print("Homogeneity of variance across ITEM within each TIME:")
+print(homogeneity_item)
+
+# =============================================================================
+# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
+# =============================================================================
+
+# Function to calculate Hartley's F-max ratio
+calculate_hartley_ratio <- function(variances) {
+  max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
+}
+
+# =============================================================================
+# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
+# =============================================================================
+
+# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
+# within each combination of within-subjects factors (TIME × ITEM)
+
+# First, let's check what values TEMPORAL_DO actually has
+print("=== CHECKING TEMPORAL_DO VALUES ===")
+print("Unique TEMPORAL_DO values:")
+print(unique(long_data_clean$TEMPORAL_DO))
+print("TEMPORAL_DO value counts:")
+print(table(long_data_clean$TEMPORAL_DO))
+
+print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
+
+observed_temporal_ratios <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    # Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
+    past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
+    fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
+    # Calculate F-max ratio
+    f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
+    .groups = 'drop'
+  ) %>%
+  select(TIME, ITEM, past_var, fut_var, f_max_ratio)
+
+print(observed_temporal_ratios)
+
+# More efficient bootstrap function for Hartley's F-max test
+bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
+  # Get unique groups and their sample sizes
+  groups <- unique(data[[group_var]])
+  
+  # Calculate observed variances for each group
+  observed_vars <- data %>%
+    dplyr::group_by(!!sym(group_var)) %>%
+    dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
+    dplyr::pull(var)
+  
+  # Handle invalid variances
+  if(any(observed_vars <= 0 | is.na(observed_vars))) {
+    observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
+  }
+  
+  # Calculate observed F-max ratio
+  observed_ratio <- max(observed_vars) / min(observed_vars)
+  
+  # Pre-allocate storage for bootstrap ratios
+  bootstrap_ratios <- numeric(n_iter)
+  
+  # Get group data once
+  group_data_list <- map(groups, ~ {
+    group_data <- data[data[[group_var]] == .x, response_var]
+    group_data[!is.na(group_data)]
+  })
+  
+  # Bootstrap with pre-allocated storage
+  for(i in 1:n_iter) {
+    # Bootstrap sample from each group independently
+    sample_vars <- map_dbl(group_data_list, ~ {
+      bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
+      var(bootstrap_sample, na.rm = TRUE)
+    })
+    bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
+  }
+  
+  # Remove invalid ratios
+  valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
+  
+  if(length(valid_ratios) == 0) {
+    stop("No valid bootstrap ratios generated")
+  }
+  
+  # Calculate critical value (95th percentile)
+  critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
+  
+  # Return only essential information
+  return(list(
+    observed_ratio = observed_ratio,
+    critical_95 = critical_95,
+    n_valid_iterations = length(valid_ratios)
+  ))
+}
+
+# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
+print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
+set.seed(123)  # For reproducibility
+
+hartley_temporal_results <- long_data_clean %>%
+  group_by(TIME, ITEM) %>%
+  summarise(
+    hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
+    .groups = 'drop'
+  ) %>%
+  mutate(
+    observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
+    critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
+    significant = observed_ratio > critical_95
+  ) %>%
+  select(TIME, ITEM, observed_ratio, critical_95, significant)
+
+print(hartley_temporal_results)
+
+# =============================================================================
+# MIXED ANOVA ANALYSIS
+# =============================================================================
+
+# Check for missing data patterns
+table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
+
+# Check data balance
+xtabs(~ pID + TIME + ITEM, data = long_data_clean)
+
+# Check data dimensions and structure
+print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
+print(paste("Number of participants:", length(unique(long_data_clean$pID))))
+print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
+print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
+print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
+
+# Check for complete cases
+complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
+print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
+
+# Check if design is balanced
+design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
+
+print(summary(as.vector(design_balance)))
+
+# Check for any participants with missing combinations
+missing_combos <- long_data_clean %>%
+  group_by(pID) %>%
+  summarise(
+    n_combinations = n(),
+    expected_combinations = 10,  # 2 TIME × 5 ITEM = 10
+    missing_combinations = 10 - n_combinations,
+    .groups = 'drop'
+  )
+
+print("Missing combinations per participant:")
+print(missing_combos[missing_combos$missing_combinations > 0, ])
+
+# Mixed ANOVA using aov() - Traditional approach
+# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
+# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
+
+mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)), 
+                        data = long_data_clean)
+
+print("Mixed ANOVA Results (aov):")
+print(summary(mixed_anova_model))
+
+# Alternative: Using afex::aov_ez for cleaner output (optional)
+print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
+mixed_anova_afex <- aov_ez(id = "pID",
+                          dv = "MEAN_DIFFERENCE",
+                          data = long_data_clean,
+                          between = "TEMPORAL_DO",
+                          within = c("TIME", "ITEM"))
+
+print("Mixed ANOVA Results (afex):")
+print(mixed_anova_afex)
+
+# =============================================================================
+# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
+# =============================================================================
+
+# Sphericity tests using ezANOVA (library already loaded)
+
+print("\n=== SPHERICITY TESTS ===")
+
+# Test sphericity for ITEM (5 levels - within-subjects)
+print("Mauchly's Test of Sphericity for ITEM:")
+tryCatch({
+  # Create a temporary data frame for ezANOVA
+  temp_data <- long_data_clean
+  temp_data$id <- as.numeric(as.factor(temp_data$pID))
+  
+  # Run ezANOVA to get sphericity tests
+  ez_item <- ezANOVA(data = temp_data, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = id, 
+                       within = ITEM,
+                       type = 3,
+                       detailed = TRUE)
+  
+  print("ITEM Sphericity Test:")
+  print(ez_item$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in ITEM sphericity test:", e$message))
+})
+
+# Test sphericity for TIME (2 levels - within-subjects)
+print("\nMauchly's Test of Sphericity for TIME:")
+tryCatch({
+  ez_time <- ezANOVA(data = temp_data, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = id, 
+                       within = TIME,
+                     type = 3,
+                     detailed = TRUE)
+  
+  print("TIME Sphericity Test:")
+  print(ez_time$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in TIME sphericity test:", e$message))
+})
+
+# Test sphericity for TIME × ITEM interaction
+print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
+tryCatch({
+  ez_interaction <- ezANOVA(data = temp_data, 
+                           dv = MEAN_DIFFERENCE, 
+                           wid = id, 
+                           within = .(TIME, ITEM),
+                           type = 3,
+                           detailed = TRUE)
+  
+  print("TIME × ITEM Sphericity Test:")
+  print(ez_interaction$Mauchly)
+  
+}, error = function(e) {
+  print(paste("Error in TIME × ITEM sphericity test:", e$message))
+})
+
+# =============================================================================
+# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
+# =============================================================================
+
+print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
+
+# Get corrected results from ezANOVA
+ez_corrected <- ezANOVA(data = long_data_clean, 
+                       dv = MEAN_DIFFERENCE, 
+                       wid = pID, 
+                       within = .(TIME, ITEM),
+                       type = 3,
+                       detailed = TRUE)
+
+print("Corrected ANOVA Results with Sphericity Corrections:")
+print(ez_corrected$ANOVA)
+
+# Show epsilon values for sphericity corrections
+print("\nEpsilon Values for Sphericity Corrections:")
+print(ez_corrected$Mauchly)
+
+# Show sphericity-corrected results
+print("\nSphericity-Corrected Results:")
+print("Available elements in ez_corrected object:")
+print(names(ez_corrected))
+
+# Check if sphericity corrections are available
+if(!is.null(ez_corrected$`Sphericity Corrections`)) {
+  print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
+  print(ez_corrected$`Sphericity Corrections`)
+  
+  # Extract and display corrected degrees of freedom
+  cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
+  
+  # Get the sphericity corrections
+  sphericity_corr <- ez_corrected$`Sphericity Corrections`
+  
+  # Extract original degrees of freedom from ANOVA table
+  anova_table <- ez_corrected$ANOVA
+  
+  # Calculate corrected degrees of freedom
+  corrected_df <- data.frame(
+    Effect = sphericity_corr$Effect,
+    Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
+    Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
+    GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
+    HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
+    GG_epsilon = sphericity_corr$GGe,
+    HF_epsilon = sphericity_corr$HFe
+  )
+  
+  print(corrected_df)
+  
+  # Also show the corrected F-values and p-values with degrees of freedom
+  cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
+  for(i in 1:nrow(corrected_df)) {
+    effect <- corrected_df$Effect[i]
+    f_value <- anova_table$F[match(effect, anova_table$Effect)]
+    
+    cat(sprintf("\n%s:\n", effect))
+    cat(sprintf("  Original: F(%d, %d) = %.3f\n", 
+                corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
+    cat(sprintf("  GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
+    cat(sprintf("  HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n", 
+                corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
+  }
+} else {
+  print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
+  print("Check the Mauchly's test p-values above to determine if corrections are needed")
+}
+
+# =============================================================================
+# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
+# =============================================================================
+
+print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
+
+# Create a wide-format data for car package (library already loaded)
+
+tryCatch({
+  # Convert to wide format for car package
+  wide_data <- long_data_clean %>%
+    select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
+    pivot_wider(names_from = c(TIME, ITEM), 
+                values_from = MEAN_DIFFERENCE,
+                names_sep = "_")
+  
+  # Create the repeated measures design
+  within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
+                   "Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
+  
+  # Check if all columns exist
+  missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
+  if(length(missing_cols) > 0) {
+    print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
+  } else {
+    # Create the repeated measures design
+    rm_design <- as.matrix(wide_data[, within_vars])
+    
+    # Calculate epsilon values
+    print("Epsilon Values from car package:")
+    epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
+    epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
+    
+    print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
+    print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
+    
+    # Interpretation
+    if(epsilon_gg < 0.75) {
+      print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
+    } else if(epsilon_hf > 0.75) {
+      print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
+    } else {
+      print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
+    }
+    
+    # =============================================================================
+    # MANUAL SPHERICITY CORRECTIONS
+    # =============================================================================
+    
+    print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
+    
+    # Apply corrections to the original ANOVA results
+    print("Applying Greenhouse-Geisser corrections to ITEM effects:")
+    
+    # ITEM main effect (DFn = 4, DFd = 4244)
+    item_df_corrected_gg <- 4 * epsilon_gg
+    item_df_corrected_hf <- 4 * epsilon_hf
+    
+    print(paste("ITEM: Original df = 4, 4244"))
+    print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
+    print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
+    
+    # TIME × ITEM interaction (DFn = 4, DFd = 4244)
+    interaction_df_corrected_gg <- 4 * epsilon_gg
+    interaction_df_corrected_hf <- 4 * epsilon_hf
+    
+    print(paste("TIME × ITEM: Original df = 4, 4244"))
+    print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
+    print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
+    
+    # Note: You would need to recalculate p-values with these corrected dfs
+    print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
+    print("The ezANOVA function should handle this automatically, but may not display the corrections")
+  }
+  
+}, error = function(e) {
+  print(paste("Error in manual epsilon calculation:", e$message))
+})
+
+# =============================================================================
+# EFFECT SIZES (GENERALIZED ETA SQUARED)
+# =============================================================================
+
+# Effect size calculations (library already loaded)
+
+print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
+
+# Calculate generalized eta squared for the aov model
+print("Effect Sizes from aov() model:")
+tryCatch({
+  # Extract effect sizes from aov model
+  aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
+  print(round(aov_effects, 5))
+}, error = function(e) {
+  print(paste("Error calculating effect sizes from aov:", e$message))
+})
+
+# Calculate effect sizes for ezANOVA model
+print("\nEffect Sizes from ezANOVA model:")
+tryCatch({
+  # ezANOVA provides partial eta squared, convert to generalized
+  ez_effects <- ez_corrected$ANOVA
+  ez_effects$ges <- ez_effects$ges  # ezANOVA already provides generalized eta squared
+  print("Generalized Eta Squared from ezANOVA:")
+  print(round(ez_effects[, c("Effect", "ges")], 5))
+}, error = function(e) {
+  print(paste("Error extracting effect sizes from ezANOVA:", e$message))
+})
+
+# Extract effect sizes (generalized eta squared)
+# For aov() objects, we need to extract from the summary
+anova_summary <- summary(mixed_anova_model)
+
+# =============================================================================
+# NOTE: MIXED MODELS (LMER) NOT NEEDED
+# =============================================================================
+
+# For this balanced repeated measures design, Type III ANOVA with proper 
+# sphericity corrections (implemented above) is the most appropriate approach.
+# Mixed models (lmer) are typically used for:
+# - Unbalanced designs
+# - Missing data patterns
+# - Nested random effects
+# - Large, complex datasets
+#
+# Your design is balanced and complete, making Type III ANOVA optimal.
+
+# =============================================================================
+# POST-HOC COMPARISONS
+# =============================================================================
+
+# Post-hoc comparisons using emmeans
+print("\n=== POST-HOC COMPARISONS ===")
+
+# Main effect of TIME
+print("Main Effect of TIME:")
+time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
+print("Estimated Marginal Means:")
+print(time_emmeans)
+print("\nPairwise Contrasts:")
+time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
+print(time_contrasts)
+
+# Main effect of ITEM
+print("\nMain Effect of ITEM:")
+item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
+print("Estimated Marginal Means:")
+print(item_emmeans)
+print("\nPairwise Contrasts:")
+item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
+print(item_contrasts)
+
+
+# =============================================================================
+# INTERACTION EXPLORATIONS
+# =============================================================================
+
+
+# TIME × ITEM Interaction
+print("\n=== TIME × ITEM INTERACTION ===")
+time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
+print("Estimated Marginal Means:")
+print(time_item_emmeans)
+
+print("\nSimple Effects of ITEM within each TIME:")
+time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
+print(time_item_simple)
+
+print("\nSimple Effects of TIME within each ITEM:")
+time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
+print(time_item_simple2)
+
+
+# =============================================================================
+# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
+# =============================================================================
+
+
+# =============================================================================
+# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
+# =============================================================================
+
+# Cohen's d calculations (library already loaded)
+
+print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
+
+# Function to calculate Cohen's d for pairwise comparisons
+calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
+  significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
+  
+  if(nrow(significant_pairs) > 0) {
+    cat("Significant pairwise comparisons (p < 0.05):\n")
+    print(significant_pairs)
+    
+    cat("\nCohen's d calculated from raw data:\n")
+    
+    for(i in seq_len(nrow(significant_pairs))) {
+      comparison <- significant_pairs[i, ]
+      contrast_name <- as.character(comparison$contrast)
+      
+      # Parse the contrast
+      contrast_parts <- strsplit(contrast_name, " - ")[[1]]
+      if(length(contrast_parts) == 2) {
+        level1 <- trimws(contrast_parts[1])
+        level2 <- trimws(contrast_parts[2])
+        
+        # Get raw data for both conditions
+        if(group2_var %in% colnames(comparison)) {
+          group2_level <- as.character(comparison[[group2_var]])
+          data1 <- data[[response_var]][
+            data[[group1_var]] == level1 & 
+            data[[group2_var]] == group2_level]
+          
+          data2 <- data[[response_var]][
+            data[[group1_var]] == level2 & 
+            data[[group2_var]] == group2_level]
+        } else {
+          data1 <- data[[response_var]][data[[group1_var]] == level1]
+          data2 <- data[[response_var]][data[[group1_var]] == level2]
+        }
+        
+        if(length(data1) > 0 && length(data2) > 0) {
+          # Calculate Cohen's d using effsize package
+          cohens_d_result <- cohen.d(data1, data2)
+          
+          cat(sprintf("Comparison: %s", contrast_name))
+          if(group2_var %in% colnames(comparison)) {
+            cat(sprintf(" | %s", group2_level))
+          }
+          cat(sprintf("\n  n1 = %d, n2 = %d\n", length(data1), length(data2)))
+          cat(sprintf("  Cohen's d: %.5f\n", cohens_d_result$estimate))
+          cat(sprintf("  Effect size interpretation: %s\n", cohens_d_result$magnitude))
+          cat(sprintf("  p-value: %.5f\n", comparison$p.value))
+          cat("\n")
+        }
+      }
+    }
+  } else {
+    cat("No significant pairwise comparisons found.\n")
+  }
+}
+
+
+# =============================================================================
+# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
+# =============================================================================
+
+print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
+
+# Get simple effects of TIME within each ITEM
+time_item_simple2_df <- as.data.frame(time_item_simple2)
+calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
+
+# Get simple effects of ITEM within each TIME
+time_item_simple_df <- as.data.frame(time_item_simple)
+calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
+
+
--- a/eohi1/normality_plots.pdf
+++ b/eohi1/normality_plots.pdf
--- a/eohi1/pearson_correlations.csv
+++ b/eohi1/pearson_correlations.csv
@ -0,0 +1,10 @@
+"","NPast_mean_total","NFut_mean_total","DGEN_past_mean","DGEN_fut_mean","domain_mean","DGEN_mean","AOT_total","CRT_correct","CRT_int"
+"NPast_mean_total",1,0.58407,0.37645,0.19034,0.91767,0.33077,0.04531,-0.00144,-0.01053
+"NFut_mean_total",0.58407,1,0.31658,0.32376,0.85851,0.37059,-0.04928,-0.074,0.05859
+"DGEN_past_mean",0.37645,0.31658,1,0.77074,0.3928,0.92613,-0.20675,-0.04166,0.0038
+"DGEN_fut_mean",0.19034,0.32376,0.77074,1,0.27874,0.92466,-0.28285,-0.0818,0.03967
+"domain_mean",0.91767,0.85851,0.3928,0.27874,1,0.39038,0.0045,-0.03713,0.02203
+"DGEN_mean",0.33077,0.37059,0.92613,0.92466,0.39038,1,-0.23213,-0.05422,0.01483
+"AOT_total",0.04531,-0.04928,-0.20675,-0.28285,0.0045,-0.23213,1,0.24279,-0.20318
+"CRT_correct",-0.00144,-0.074,-0.04166,-0.0818,-0.03713,-0.05422,0.24279,1,-0.89661
+"CRT_int",-0.01053,0.05859,0.0038,0.03967,0.02203,0.01483,-0.20318,-0.89661,1
--- a/eohi1/regression
+++ b/eohi1/regression
@ -0,0 +1,232 @@
+options(scipen = 999)
+
+library(dplyr)
+library(car)
+library(lmtest)
+library(stargazer)
+library(sandwich)
+library(lmtest)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+df <- read.csv("ehi1.csv")
+
+data <- df %>%
+  select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
+  mutate(demo_edu = as.factor(demo_edu))
+
+# examine data object
+str(data)
+colSums(is.na(data))
+sapply(data, class)
+levels(data$demo_edu)
+
+data$demo_edu <- factor(data$demo_edu, levels = c(
+  "High School (or equivalent)",
+  "Trade School (non-military)", 
+  "College Diploma/Certificate",
+  "University - Undergraduate",
+  "University - Graduate (Masters)",
+  "University - PhD",
+  "Professional Degree (ex. JD/MD)"
+))
+
+levels(data$demo_edu)
+# Create dummy variables
+dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
+dummy_df <- as.data.frame(dummy_vars)
+
+# Rename columns with meaningful names (excluding reference level)
+colnames(dummy_df) <- c(
+  "edu_highschool",           # reference level (will be dropped)
+  "edu_trade",
+  "edu_college", 
+  "edu_uni_undergrad",
+  "edu_uni_masters",
+  "edu_uni_phd",
+  "edu_prof"
+)
+# Add to your data
+data <- cbind(data, dummy_df)
+
+data <- data %>% select(-starts_with("edu_highschool"))
+
+#### MODEL 1 - DGEN ####
+
+model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
+             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
+
+# Model 1 diagnostics
+par(mfrow = c(2, 2))
+plot(model_DGEN, which = 1)  # Residuals vs Fitted
+plot(model_DGEN, which = 2)  # Normal Q-Q, normality
+hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
+shapiro.test(residuals(model_DGEN))
+
+plot(model_DGEN, which = 3)  # Scale-Location
+plot(model_DGEN, which = 4)  # Cook's Distance
+
+# Model 1 specific tests
+vif(model_DGEN)              # Multicollinearity
+dwtest(model_DGEN)           # Independence
+outlierTest(model_DGEN)      # Outliers
+
+# Look at the specific influential cases
+data[c(670, 388, 760), ]
+
+# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
+# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
+# normality violated but sample size is robust to violation
+# no multicollinearity
+# no autocorrelation (samples are independent)
+
+#results
+print(summary(model_DGEN))
+print(AIC(model_DGEN))
+
+# Create a nice formatted table
+stargazer(model_DGEN, type = "text", 
+          title = "Regression Results: Education and EOHI-DGEN",
+          dep.var.labels = "EOHI-DGEN Mean",
+          covariate.labels = c("Trade School", "College", "University Undergrad", 
+                              "University Masters", "University PhD", "Professional Degree"),
+          report = "vcsp",
+          add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
+
+# Use robust standard errors (doesn't change coefficients, just SEs)
+modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
+
+stargazer(modelDGEN_robust, type = "text", 
+          title = "Regression Results: Education and EOHI-DGEN",
+          dep.var.labels = "EOHI-DGEN Mean",
+          covariate.labels = c("Trade School", "College", "University Undergrad", 
+                              "University Masters", "University PhD", "Professional Degree"),
+          report = "vcsp") 
+          
+
+#### MODEL 2 - DOMAIN ####
+
+model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad + 
+             edu_uni_masters + edu_uni_phd + edu_prof, data = data)
+
+# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
+plot(model_domain, which = 1)  # Residuals vs Fitted
+
+plot(model_domain, which = 2)  # Normal Q-Q, normality
+hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
+shapiro.test(residuals(model_domain))
+
+plot(model_domain, which = 3)  # Scale-Location
+plot(model_domain, which = 4)  # Cook's Distance
+
+# Model 2 specific tests
+vif(model_domain)              # Multicollinearity
+dwtest(model_domain)           # Independence
+outlierTest(model_domain)      # Outliers
+
+# Check if the autocorrelation is real or artifactual
+# Plot residuals against observation order
+plot(residuals(model_domain), type = "l")
+abline(h = 0, col = "red")
+
+
+# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
+# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
+# normality violated but sample size is robust to violation
+# no multicollinearity
+# auto correlation is significant, may be due to aggregated measure of multiple repeated measures 
+
+# Reset plotting to 1x1
+# par(mfrow = c(1, 1))
+
+print(summary(model_domain))
+print(AIC(model_domain))
+
+stargazer(model_domain, type = "text", 
+          title = "Regression Results: Education and EHI Domain",
+          dep.var.labels = "EHI Domain Mean",
+          covariate.labels = c("Trade School", "College", "University Undergrad", 
+                              "University Masters", "University PhD", "Professional Degree"),
+          report = "vcsp",  # This shows coefficients, SEs, and p-values
+          add.lines = list(c("AIC", round(AIC(model_domain), 2))))
+
+# Use robust standard errors (doesn't change coefficients, just SEs)
+modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
+
+stargazer(modelDOMAIN_robust, type = "text", 
+          title = "Regression Results: Education and EHI Domain",
+          dep.var.labels = "EHI Domain Mean",
+          covariate.labels = c("Trade School", "College", "University Undergrad", 
+                              "University Masters", "University PhD", "Professional Degree"),
+          report = "vcsp") 
+
+#### PLOTS ####
+
+library(ggplot2)
+library(dplyr)
+
+# Calculate means and confidence intervals for EOHI-DGEN
+edu_summary_DGEN <- data %>%
+  group_by(demo_edu) %>%
+  summarise(
+    mean_DGEN = mean(eohiDGEN_mean, na.rm = TRUE),
+    n = n(),
+    se_DGEN = sd(eohiDGEN_mean, na.rm = TRUE) / sqrt(n()),
+    ci_lower_DGEN = mean_DGEN - 1.96 * se_DGEN,
+    ci_upper_DGEN = mean_DGEN + 1.96 * se_DGEN
+  )
+
+# Calculate means and confidence intervals for EHI Domain
+edu_summary_domain <- data %>%
+  group_by(demo_edu) %>%
+  summarise(
+    mean_domain = mean(ehi_global_mean, na.rm = TRUE),
+    n = n(),
+    se_domain = sd(ehi_global_mean, na.rm = TRUE) / sqrt(n()),
+    ci_lower_domain = mean_domain - 1.96 * se_domain,
+    ci_upper_domain = mean_domain + 1.96 * se_domain
+  )
+
+# Plot 1: EOHI-DGEN means with confidence intervals
+p1 <- ggplot(edu_summary_DGEN, aes(x = demo_edu, y = mean_DGEN)) +
+  geom_point(size = 3, color = "steelblue") +
+  geom_errorbar(aes(ymin = ci_lower_DGEN, ymax = ci_upper_DGEN), 
+                width = 0.1, color = "steelblue", linewidth = 1) +
+  labs(
+    title = "Mean EOHI-DGEN by Education Level",
+    subtitle = "Error bars show 95% confidence intervals",
+    x = "Education Level",
+    y = "Mean EOHI-DGEN Score"
+  ) +
+  theme_minimal() +
+  theme(
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    plot.title = element_text(size = 14, face = "bold"),
+    plot.subtitle = element_text(size = 10, color = "gray60")
+  )
+
+# Plot 2: EHI Domain means with confidence intervals
+p2 <- ggplot(edu_summary_domain, aes(x = demo_edu, y = mean_domain)) +
+  geom_point(size = 3, color = "darkgreen") +
+  geom_errorbar(aes(ymin = ci_lower_domain, ymax = ci_upper_domain), 
+                width = 0.1, color = "darkgreen", linewidth = 1) +
+  labs(
+    title = "Mean EHI Domain by Education Level",
+    subtitle = "Error bars show 95% confidence intervals",
+    x = "Education Level",
+    y = "Mean EHI Domain Score"
+  ) +
+  theme_minimal() +
+  theme(
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    plot.title = element_text(size = 14, face = "bold"),
+    plot.subtitle = element_text(size = 10, color = "gray60")
+  )
+
+# Display the plots
+print(p1)
+print(p2)
+
+# Save the plots
+ggsave("education_DGEN_means.png", p1, width = 10, height = 6, dpi = 300)
+ggsave("education_domain_means.png", p2, width = 10, height = 6, dpi = 300)
--- a/eohi1/regression
+++ b/eohi1/regression
@ -0,0 +1,320 @@
+options(scipen = 999)
+
+library(dplyr)
+library(car)
+library(lmtest)
+library(stargazer)
+library(sandwich)
+library(lmtest)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
+
+df <- read.csv("ehi1.csv")
+
+data <- df %>%
+  select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
+  filter(demo_sex != "Prefer not to say")
+
+str(data)
+colSums(is.na(data))
+sapply(data, class)
+
+# Create dummy variable for sex (0 = Male, 1 = Female)
+data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
+
+# Verify the dummy coding
+print(table(data$demo_sex, data$sex_dummy))
+
+#descriptives
+
+# Descriptives for age
+print(summary(data$demo_age_1))
+print(sd(data$demo_age_1, na.rm = TRUE))
+
+# Center demo_age_1 (subtract the mean)
+data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
+
+# Verify the centering
+print(summary(data$age_centered))
+
+# Descriptives for sex (frequency table)
+print(table(data$demo_sex))
+print(prop.table(table(data$demo_sex)))
+
+# Descriptives for sex dummy variable
+print(table(data$sex_dummy))
+
+#### REGRESSION MODELS ####
+# MODEL 1: Age only - EOHI
+age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
+par(mfrow = c(2, 2))
+plot(age_DGEN)
+print(shapiro.test(residuals(age_DGEN)))
+print(summary(age_DGEN))
+print(AIC(age_DGEN))
+
+# MODEL 1: Age only - EHI
+age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
+par(mfrow = c(2, 2))
+plot(age_domain)
+print(shapiro.test(residuals(age_domain)))
+print(summary(age_domain))
+print(AIC(age_domain))
+
+# MODEL 2: Sex only - EOHI
+sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
+par(mfrow = c(2, 2))
+plot(sex_DGEN)
+print(shapiro.test(residuals(sex_DGEN)))
+print(summary(sex_DGEN))
+print(AIC(sex_DGEN))
+# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
+# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
+# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n. 
+  # p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
+# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed. 
+
+# MODEL 2: Sex only - EHI
+sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
+par(mfrow = c(2, 2))
+plot(sex_domain)
+print(shapiro.test(residuals(sex_domain)))
+print(summary(sex_domain))
+print(AIC(sex_domain))
+
+# MODEL 3: Age + Sex + Interaction - EOHI
+interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
+par(mfrow = c(2, 2))
+plot(interaction_DGEN)
+print(shapiro.test(residuals(interaction_DGEN)))
+vif_DGEN <- vif(interaction_DGEN)
+print(vif_DGEN)
+print(summary(interaction_DGEN))
+print(AIC(interaction_DGEN))
+
+# MODEL 3: Age + Sex + Interaction - EHI
+interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
+par(mfrow = c(2, 2))
+plot(interaction_domain)
+print(shapiro.test(residuals(interaction_domain)))
+vif_domain <- vif(interaction_domain)
+print(vif_domain)
+print(summary(interaction_domain))
+print(AIC(interaction_domain))
+
+#### troubleshooting ####
+# Clear any existing plots
+# dev.off()
+
+
+#### PLOTS ####
+
+# Create visual figures for age models
+library(ggplot2)
+
+# Figure 1: age_DGEN model
+p1 <- ggplot(data, aes(x = age_centered, y = eohiDGEN_mean)) +
+  geom_point(alpha = 0.6, color = "steelblue") +
+  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
+  labs(
+    title = "Age and EOHI-DGEN Relationship",
+    x = "Age (centered)",
+    y = "EOHI-DGEN Mean",
+    subtitle = paste("R² =", round(summary(age_DGEN)$r.squared, 3),
+                     ", p < 0.001")
+  ) +
+  theme_minimal() +
+  theme(
+    plot.title = element_text(size = 14, face = "bold"),
+    axis.title = element_text(size = 12),
+    plot.subtitle = element_text(size = 10, color = "gray60")
+  )
+
+# Figure 2: age_domain model
+p2 <- ggplot(data, aes(x = age_centered, y = ehi_global_mean)) +
+  geom_point(alpha = 0.6, color = "darkgreen") +
+  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
+  labs(
+    title = "Age and EHI Domain Relationship",
+    x = "Age (centered)",
+    y = "EHI Domain Mean",
+    subtitle = paste("R² =", round(summary(age_domain)$r.squared, 3),
+                     ", p < 0.001")
+  ) +
+  theme_minimal() +
+  theme(
+    plot.title = element_text(size = 14, face = "bold"),
+    axis.title = element_text(size = 12),
+    plot.subtitle = element_text(size = 10, color = "gray60")
+  )
+
+# Save the plots
+ggsave("age_DGEN_plot.png", p1, width = 8, height = 6, dpi = 300)
+ggsave("age_domain_plot.png", p2, width = 8, height = 6, dpi = 300)
+
+# Display the plots
+print(p1)
+print(p2)
+
+#### HTML file ####
+
+# Create comprehensive HTML report grouped by model
+library(htmltools)
+
+# Start HTML document
+html_content <- htmltools::div(
+  htmltools::h1("Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain"),
+  
+  # EOHI-DGEN Models Section
+  htmltools::h2("EOHI-DGEN Models"),
+  
+  # Age Model
+  htmltools::h3("1. Age Model (age_DGEN)"),
+  htmltools::div(
+    style = "margin-bottom: 30px;",
+    htmltools::h4("Model Results, AIC, and VIF"),
+    htmltools::HTML(
+      stargazer(
+        age_DGEN,
+        type = "html",
+        title = "Age Model: EOHI-DGEN",
+        dep.var.labels = "EOHI-DGEN Mean",
+        covariate.labels = c("Age (centered)"),
+        report = "vcsp*",
+        add.lines = list(
+          c("AIC", round(AIC(age_DGEN), 2)),
+          c("Max VIF", "N/A")
+        )
+      )
+    ),
+    htmltools::h4("Assumption Diagnostic Plots"),
+    htmltools::img(src = "age_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;"),
+    htmltools::h4("Model Visualization"),
+    htmltools::img(src = "age_DGEN_plot.png", style = "width: 100%; max-width: 800px;")
+  ),
+  
+  # Sex Model
+  htmltools::h3("2. Sex Model (sex_DGEN)"),
+  htmltools::div(
+    style = "margin-bottom: 30px;",
+    htmltools::h4("Model Results, AIC, and VIF"),
+    htmltools::HTML(
+      stargazer(
+        sex_DGEN,
+        type = "html",
+        title = "Sex Model: EOHI-DGEN",
+        dep.var.labels = "EOHI-DGEN Mean",
+        covariate.labels = c("Sex (dummy)"),
+        report = "vcsp*",
+        add.lines = list(
+          c("AIC", round(AIC(sex_DGEN), 2)),
+          c("Max VIF", "N/A")
+        )
+      )
+    ),
+    htmltools::h4("Assumption Diagnostic Plots"),
+    htmltools::img(src = "sex_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
+  ),
+  
+  # Interaction Model
+  htmltools::h3("3. Interaction Model (interaction_DGEN)"),
+  htmltools::div(
+    style = "margin-bottom: 30px;",
+    htmltools::h4("Model Results, AIC, and VIF"),
+    htmltools::HTML(
+      stargazer(
+        interaction_DGEN,
+        type = "html",
+        title = "Interaction Model: EOHI-DGEN",
+        dep.var.labels = "EOHI-DGEN Mean",
+        covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
+        report = "vcsp*",
+        add.lines = list(
+          c("AIC", round(AIC(interaction_DGEN), 2)),
+          c("Max VIF", max_vif_DGEN)
+        )
+      )
+    ),
+    htmltools::h4("Assumption Diagnostic Plots"),
+    htmltools::img(src = "interaction_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
+  ),
+  
+  # EHI Domain Models Section
+  htmltools::h2("EHI Domain Models"),
+  
+  # Age Model
+  htmltools::h3("1. Age Model (age_domain)"),
+  htmltools::div(
+    style = "margin-bottom: 30px;",
+    htmltools::h4("Model Results, AIC, and VIF"),
+    htmltools::HTML(
+      stargazer(
+        age_domain,
+        type = "html",
+        title = "Age Model: EHI Domain",
+        dep.var.labels = "EHI Domain Mean",
+        covariate.labels = c("Age (centered)"),
+        report = "vcsp*",
+        add.lines = list(
+          c("AIC", round(AIC(age_domain), 2)),
+          c("Max VIF", "N/A")
+        )
+      )
+    ),
+    htmltools::h4("Assumption Diagnostic Plots"),
+    htmltools::img(src = "age_domain_assumptions.png", style = "width: 100%; max-width: 800px;"),
+    htmltools::h4("Model Visualization"),
+    htmltools::img(src = "age_domain_plot.png", style = "width: 100%; max-width: 800px;")
+  ),
+  
+  # Sex Model
+  htmltools::h3("2. Sex Model (sex_domain)"),
+  htmltools::div(
+    style = "margin-bottom: 30px;",
+    htmltools::h4("Model Results, AIC, and VIF"),
+    htmltools::HTML(
+      stargazer(
+        sex_domain,
+        type = "html",
+        title = "Sex Model: EHI Domain",
+        dep.var.labels = "EHI Domain Mean",
+        covariate.labels = c("Sex (dummy)"),
+        report = "vcsp*",
+        add.lines = list(
+          c("AIC", round(AIC(sex_domain), 2)),
+          c("Max VIF", "N/A")
+        )
+      )
+    ),
+    htmltools::h4("Assumption Diagnostic Plots"),
+    htmltools::img(src = "sex_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
+  ),
+  
+  # Interaction Model
+  htmltools::h3("3. Interaction Model (interaction_domain)"),
+  htmltools::div(
+    style = "margin-bottom: 30px;",
+    htmltools::h4("Model Results, AIC, and VIF"),
+    htmltools::HTML(
+      stargazer(
+        interaction_domain,
+        type = "html",
+        title = "Interaction Model: EHI Domain",
+        dep.var.labels = "EHI Domain Mean",
+        covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
+        report = "vcsp*",
+        add.lines = list(
+          c("AIC", round(AIC(interaction_domain), 2)),
+          c("Max VIF", max_vif_domain)
+        )
+      )
+    ),
+    htmltools::h4("Assumption Diagnostic Plots"),
+    htmltools::img(src = "interaction_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
+  )
+)
+
+# Save HTML content
+htmltools::save_html(html_content, "regression_analysis_report.html")
+
+print("HTML report created: regression_analysis_report.html")
--- a/eohi1/regression_DGEN_models.html
+++ b/eohi1/regression_DGEN_models.html
@ -0,0 +1,31 @@
+
+<table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
+<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
+<tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
+<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
+<tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
+<tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
+<tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
+<tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
+<tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
+<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
+<tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
+<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
+<tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
+<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
+<tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
+<tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
+</table>
--- a/eohi1/regression_EHI_domain_models.html
+++ b/eohi1/regression_EHI_domain_models.html
@ -0,0 +1,31 @@
+
+<table style="text-align:center"><caption><strong>Regression Models: EHI Domain</strong></caption>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
+<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
+<tr><td style="text-align:left"></td><td colspan="3">EHI Domain Mean</td></tr>
+<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td><td></td><td>-0.006</td></tr>
+<tr><td style="text-align:left"></td><td>(0.001)</td><td></td><td>(0.001)</td></tr>
+<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td></td><td>p = 0.00001<sup>***</sup></td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.031</td><td>0.033</td></tr>
+<tr><td style="text-align:left"></td><td></td><td>(0.031)</td><td>(0.030)</td></tr>
+<tr><td style="text-align:left"></td><td></td><td>p = 0.311</td><td>p = 0.274</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>-0.0003</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td>(0.002)</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.883</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Constant</td><td>0.137</td><td>0.121</td><td>0.120</td></tr>
+<tr><td style="text-align:left"></td><td>(0.015)</td><td>(0.022)</td><td>(0.021)</td></tr>
+<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td><td>1562.88</td><td>1518.52</td></tr>
+<tr><td style="text-align:left">Max VIF</td><td>NA</td><td>NA</td><td>2.22</td></tr>
+<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
+<tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td><td>0.001</td><td>0.045</td></tr>
+<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td><td>0.00003</td><td>0.043</td></tr>
+<tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td><td>0.504 (df = 1059)</td><td>0.494 (df = 1057)</td></tr>
+<tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td><td>1.029 (df = 1; 1059)</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
+</table>
--- a/eohi1/regression_EOHI_models.html
+++ b/eohi1/regression_EOHI_models.html
@ -0,0 +1,31 @@
+
+<table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
+<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
+<tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
+<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
+<tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
+<tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
+<tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
+<tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
+<tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
+<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
+<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
+<tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
+<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
+<tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
+<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
+<tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
+<tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
+<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
+</table>
--- a/eohi1/regression_analysis_report.html
+++ b/eohi1/regression_analysis_report.html
@ -0,0 +1,62 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8"/>
+<style>body{background-color:white;}</style>
+
+
+</head>
+<body>
+<div>
+  <h1>Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain</h1>
+  <h2>EOHI-DGEN Models</h2>
+  <h3>1. Age Model (age_DGEN)</h3>
+  <div style="margin-bottom: 30px;">
+    <h4>Model Results, AIC, and VIF</h4>
+     <table style="text-align:center"><caption><strong>Age Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td></tr> <tr><td style="text-align:left"></td><td>(0.003)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.417</td></tr> <tr><td style="text-align:left"></td><td>(0.053)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
+    <h4>Assumption Diagnostic Plots</h4>
+    <img src="age_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
+    <h4>Model Visualization</h4>
+    <img src="age_DGEN_plot.png" style="width: 100%; max-width: 800px;"/>
+  </div>
+  <h3>2. Sex Model (sex_DGEN)</h3>
+  <div style="margin-bottom: 30px;">
+    <h4>Model Results, AIC, and VIF</h4>
+     <table style="text-align:center"><caption><strong>Sex Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.099</td></tr> <tr><td style="text-align:left"></td><td>(0.107)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.355</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.367</td></tr> <tr><td style="text-align:left"></td><td>(0.076)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4192.48</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>-0.0001</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.742 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>0.859 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
+    <h4>Assumption Diagnostic Plots</h4>
+    <img src="sex_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
+  </div>
+  <h3>3. Interaction Model (interaction_DGEN)</h3>
+  <div style="margin-bottom: 30px;">
+    <h4>Model Results, AIC, and VIF</h4>
+     <table style="text-align:center"><caption><strong>Interaction Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.018</td></tr> <tr><td style="text-align:left"></td><td>(0.005)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.0004<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.103</td></tr> <tr><td style="text-align:left"></td><td>(0.106)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.331</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.007)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.402</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.364</td></tr> <tr><td style="text-align:left"></td><td>(0.075)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4176.42</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.020</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
+    <h4>Assumption Diagnostic Plots</h4>
+    <img src="interaction_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
+  </div>
+  <h2>EHI Domain Models</h2>
+  <h3>1. Age Model (age_domain)</h3>
+  <div style="margin-bottom: 30px;">
+    <h4>Model Results, AIC, and VIF</h4>
+     <table style="text-align:center"><caption><strong>Age Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.137</td></tr> <tr><td style="text-align:left"></td><td>(0.015)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
+    <h4>Assumption Diagnostic Plots</h4>
+    <img src="age_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
+    <h4>Model Visualization</h4>
+    <img src="age_domain_plot.png" style="width: 100%; max-width: 800px;"/>
+  </div>
+  <h3>2. Sex Model (sex_domain)</h3>
+  <div style="margin-bottom: 30px;">
+    <h4>Model Results, AIC, and VIF</h4>
+     <table style="text-align:center"><caption><strong>Sex Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.031</td></tr> <tr><td style="text-align:left"></td><td>(0.031)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.311</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.121</td></tr> <tr><td style="text-align:left"></td><td>(0.022)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1562.88</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.00003</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.504 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>1.029 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
+    <h4>Assumption Diagnostic Plots</h4>
+    <img src="sex_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
+  </div>
+  <h3>3. Interaction Model (interaction_domain)</h3>
+  <div style="margin-bottom: 30px;">
+    <h4>Model Results, AIC, and VIF</h4>
+     <table style="text-align:center"><caption><strong>Interaction Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.033</td></tr> <tr><td style="text-align:left"></td><td>(0.030)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.274</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>-0.0003</td></tr> <tr><td style="text-align:left"></td><td>(0.002)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.883</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.120</td></tr> <tr><td style="text-align:left"></td><td>(0.021)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1518.52</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.045</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.494 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
+    <h4>Assumption Diagnostic Plots</h4>
+    <img src="interaction_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
+  </div>
+</div>
+</body>
+</html>
--- a/eohi1/reliability_analysis_cronbach_alpha.r
+++ b/eohi1/reliability_analysis_cronbach_alpha.r
@ -0,0 +1,204 @@
+# Cronbach's Alpha Reliability Analysis
+# For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
+# 5 items per domain per time point
+
+# Load required libraries
+library(psych)
+library(dplyr)
+library(tidyr)
+
+# Read the data
+data <- read.csv("exp1.csv")
+
+# Define the scale variables for each domain and time point
+# Past time point scales
+past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", 
+                   "NPastDiff_pref_nap", "NPastDiff_pref_travel")
+
+past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", 
+                   "NPastDiff_pers_anxious", "NPastDiff_pers_complex")
+
+past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", 
+                  "NPastDiff_val_performance", "NPastDiff_val_justice")
+
+past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", 
+                   "NPastDiff_life_important", "NPastDiff_life_change")
+
+# Future time point scales
+fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", 
+                  "NFutDiff_pref_nap", "NFutDiff_pref_travel")
+
+fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", 
+                  "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
+
+fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", 
+                 "NFutDiff_val_performance", "NFutDiff_val_justice")
+
+fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", 
+                  "NFutDiff_life_important", "NFutDiff_life_change")
+
+# Function to calculate Cronbach's alpha and return detailed results
+calc_cronbach_alpha <- function(data, var_names, scale_name) {
+  # Check for missing values
+  scale_data <- data[, var_names]
+  missing_info <- data.frame(
+    Variable = var_names,
+    Missing_Count = colSums(is.na(scale_data)),
+    Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
+  )
+  
+  # Remove rows with any missing values for reliability analysis
+  complete_data <- scale_data[complete.cases(scale_data), ]
+  
+  cat("\n", "="*60, "\n")
+  cat("SCALE:", scale_name, "\n")
+  cat("="*60, "\n")
+  
+  cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
+  cat("Original sample size:", nrow(data), "\n")
+  cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
+  
+  cat("Missing data summary:\n")
+  print(missing_info)
+  
+  if(nrow(complete_data) < 3) {
+    cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
+    return(NULL)
+  }
+  
+  # Calculate Cronbach's alpha
+  alpha_result <- alpha(complete_data, check.keys = TRUE)
+  
+  cat("\nCronbach's Alpha Results:\n")
+  cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
+  cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
+  cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
+  cat("Number of items:", alpha_result$total$nvar, "\n")
+  
+  # Item statistics
+  cat("\nItem Statistics:\n")
+  item_stats <- data.frame(
+    Item = var_names,
+    Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
+    Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
+    Mean = round(alpha_result$item.stats$mean, 4),
+    SD = round(alpha_result$item.stats$sd, 4)
+  )
+  print(item_stats)
+  
+  # Check assumptions
+  cat("\nAssumption Checks:\n")
+  
+  # 1. Check for sufficient sample size (minimum 30 recommended)
+  sample_size_ok <- nrow(complete_data) >= 30
+  cat("Sample size adequate (≥30):", sample_size_ok, "\n")
+  
+  # 2. Check for adequate inter-item correlations (should be > 0.30)
+  inter_item_cors <- cor(complete_data)
+  inter_item_cors[lower.tri(inter_item_cors)] <- NA
+  diag(inter_item_cors) <- NA
+  inter_item_cors_flat <- as.vector(inter_item_cors)
+  inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
+  adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
+  cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
+  
+  # 3. Check for negative correlations (concerning for unidimensionality)
+  negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
+  cat("Number of negative inter-item correlations:", negative_cors, "\n")
+  
+  # 4. Check item variances (should be roughly similar)
+  item_vars <- apply(complete_data, 2, var)
+  var_ratio <- max(item_vars) / min(item_vars)
+  cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
+  
+  return(alpha_result)
+}
+
+# Calculate Cronbach's alpha for all scales
+cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
+cat("\nData: exp1.csv")
+cat("\nTotal sample size:", nrow(data))
+
+# Past time point analyses
+past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
+past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
+past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
+past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
+
+# Future time point analyses
+fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
+fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
+fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
+fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
+
+# Summary table
+cat("\n", "="*80, "\n")
+cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
+cat("\n", "="*80, "\n")
+
+summary_results <- data.frame(
+  Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
+           "Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
+  Alpha = c(
+    if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
+    if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
+  ),
+  Items = rep(5, 8),
+  Interpretation = c(
+    if(!is.null(past_pref_alpha)) {
+      alpha_val <- past_pref_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(past_pers_alpha)) {
+      alpha_val <- past_pers_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(past_val_alpha)) {
+      alpha_val <- past_val_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(past_life_alpha)) {
+      alpha_val <- past_life_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(fut_pref_alpha)) {
+      alpha_val <- fut_pref_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(fut_pers_alpha)) {
+      alpha_val <- fut_pers_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(fut_val_alpha)) {
+      alpha_val <- fut_val_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data",
+    if(!is.null(fut_life_alpha)) {
+      alpha_val <- fut_life_alpha$total$raw_alpha
+      if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
+    } else "Insufficient data"
+  )
+)
+
+print(summary_results)
+
+# Save results to CSV
+write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
+cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
+
+cat("\n", "="*80, "\n")
+cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
+cat("\n", "="*80, "\n")
+cat("α ≥ 0.90: Excellent reliability\n")
+cat("α ≥ 0.80: Good reliability\n") 
+cat("α ≥ 0.70: Acceptable reliability\n")
+cat("α ≥ 0.60: Questionable reliability\n")
+cat("α < 0.60: Poor reliability\n")
+
+
--- a/eohi1/residual_plots.pdf
+++ b/eohi1/residual_plots.pdf
--- a/eohi1/results
+++ b/eohi1/results
--- a/eohi1/results
+++ b/eohi1/results
--- a/eohi1/sex_DGEN_assumptions.png
+++ b/eohi1/sex_DGEN_assumptions.png
--- a/eohi1/sex_domain_assumptions.png
+++ b/eohi1/sex_domain_assumptions.png
--- a/eohi1/spearman_correlations.csv
+++ b/eohi1/spearman_correlations.csv
@ -0,0 +1,14 @@
+"","eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean","ehi_pref_mean","ehi_pers_mean","ehi_val_mean","ehi_life_mean","ehi_global_mean","AOT_total","CRT_correct","CRT_int"
+"eohiDGEN_pref",1,0.27683,0.23964,0.24515,0.66949,0.22538,0.09704,0.08007,0.12507,0.21415,0.06836,0.05361,-0.05819
+"eohiDGEN_pers",0.27683,1,0.31901,0.18576,0.70868,0.21513,0.18684,0.13159,0.12199,0.26738,0.0916,0.02566,-0.01732
+"eohiDGEN_val",0.23964,0.31901,1,0.13719,0.68261,0.14816,0.12424,0.23448,0.06907,0.21756,0.10396,0.07479,-0.06044
+"eohiDGEN_life",0.24515,0.18576,0.13719,1,0.25499,0.02053,0.06528,0.04652,0.29721,0.23854,0.11795,0.03949,-0.0013
+"eohiDGEN_mean",0.66949,0.70868,0.68261,0.25499,1,0.26963,0.17831,0.19688,0.1283,0.31052,0.11102,0.06665,-0.05731
+"ehi_pref_mean",0.22538,0.21513,0.14816,0.02053,0.26963,1,0.23765,0.19925,0.0625,0.51305,0.0504,0.06892,-0.06473
+"ehi_pers_mean",0.09704,0.18684,0.12424,0.06528,0.17831,0.23765,1,0.22369,0.12488,0.5826,0.03132,0.05133,-0.04906
+"ehi_val_mean",0.08007,0.13159,0.23448,0.04652,0.19688,0.19925,0.22369,1,0.02752,0.51294,0.01519,0.00599,-0.03791
+"ehi_life_mean",0.12507,0.12199,0.06907,0.29721,0.1283,0.0625,0.12488,0.02752,1,0.59399,0.07295,0.01639,-0.00581
+"ehi_global_mean",0.21415,0.26738,0.21756,0.23854,0.31052,0.51305,0.5826,0.51294,0.59399,1,0.0849,0.05133,-0.05746
+"AOT_total",0.06836,0.0916,0.10396,0.11795,0.11102,0.0504,0.03132,0.01519,0.07295,0.0849,1,0.2609,-0.20963
+"CRT_correct",0.05361,0.02566,0.07479,0.03949,0.06665,0.06892,0.05133,0.00599,0.01639,0.05133,0.2609,1,-0.87087
+"CRT_int",-0.05819,-0.01732,-0.06044,-0.0013,-0.05731,-0.06473,-0.04906,-0.03791,-0.00581,-0.05746,-0.20963,-0.87087,1
--- a/reliability.html
+++ b/reliability.html
@ -0,0 +1,145 @@
+<html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Cronbach's Alpha</h2><table>
+ <thead>
+  <tr>
+   <th style="text-align:left;">   </th>
+   <th style="text-align:right;"> raw_alpha </th>
+   <th style="text-align:right;"> std.alpha </th>
+   <th style="text-align:right;"> G6(smc) </th>
+   <th style="text-align:right;"> average_r </th>
+   <th style="text-align:right;"> S/N </th>
+   <th style="text-align:right;"> ase </th>
+   <th style="text-align:right;"> mean </th>
+   <th style="text-align:right;"> sd </th>
+   <th style="text-align:right;"> median_r </th>
+  </tr>
+ </thead>
+<tbody>
+  <tr>
+   <td style="text-align:left;">  </td>
+   <td style="text-align:right;"> 0.5709145 </td>
+   <td style="text-align:right;"> 0.6860723 </td>
+   <td style="text-align:right;"> 0.6450534 </td>
+   <td style="text-align:right;"> 0.3533207 </td>
+   <td style="text-align:right;"> 2.185446 </td>
+   <td style="text-align:right;"> 0.0229978 </td>
+   <td style="text-align:right;"> 0.138301 </td>
+   <td style="text-align:right;"> 0.8228433 </td>
+   <td style="text-align:right;"> 0.3517941 </td>
+  </tr>
+</tbody>
+</table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.77987</p><h2>Item-Level Statistics</h2><table>
+ <thead>
+  <tr>
+   <th style="text-align:left;">   </th>
+   <th style="text-align:right;"> n </th>
+   <th style="text-align:right;"> raw.r </th>
+   <th style="text-align:right;"> std.r </th>
+   <th style="text-align:right;"> r.cor </th>
+   <th style="text-align:right;"> r.drop </th>
+   <th style="text-align:right;"> mean </th>
+   <th style="text-align:right;"> sd </th>
+  </tr>
+ </thead>
+<tbody>
+  <tr>
+   <td style="text-align:left;"> ehiDGEN_5_mean </td>
+   <td style="text-align:right;"> 489 </td>
+   <td style="text-align:right;"> 0.7653498 </td>
+   <td style="text-align:right;"> 0.6789150 </td>
+   <td style="text-align:right;"> 0.5035473 </td>
+   <td style="text-align:right;"> 0.4480613 </td>
+   <td style="text-align:right;"> 0.0620314 </td>
+   <td style="text-align:right;"> 1.4573526 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehiDGEN_10_mean </td>
+   <td style="text-align:right;"> 489 </td>
+   <td style="text-align:right;"> 0.8686359 </td>
+   <td style="text-align:right;"> 0.7381306 </td>
+   <td style="text-align:right;"> 0.6097580 </td>
+   <td style="text-align:right;"> 0.5098844 </td>
+   <td style="text-align:right;"> 0.3006135 </td>
+   <td style="text-align:right;"> 1.8924455 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehi5_global_mean </td>
+   <td style="text-align:right;"> 489 </td>
+   <td style="text-align:right;"> 0.5032176 </td>
+   <td style="text-align:right;"> 0.7114339 </td>
+   <td style="text-align:right;"> 0.5651807 </td>
+   <td style="text-align:right;"> 0.3871421 </td>
+   <td style="text-align:right;"> 0.0675187 </td>
+   <td style="text-align:right;"> 0.4620107 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehi10_global_mean </td>
+   <td style="text-align:right;"> 489 </td>
+   <td style="text-align:right;"> 0.5705196 </td>
+   <td style="text-align:right;"> 0.7420342 </td>
+   <td style="text-align:right;"> 0.6269733 </td>
+   <td style="text-align:right;"> 0.4474763 </td>
+   <td style="text-align:right;"> 0.1230402 </td>
+   <td style="text-align:right;"> 0.5252241 </td>
+  </tr>
+</tbody>
+</table><h3>Alpha if Item Dropped</h3><table>
+ <thead>
+  <tr>
+   <th style="text-align:left;">   </th>
+   <th style="text-align:right;"> raw_alpha </th>
+   <th style="text-align:right;"> std.alpha </th>
+   <th style="text-align:right;"> G6(smc) </th>
+   <th style="text-align:right;"> average_r </th>
+   <th style="text-align:right;"> S/N </th>
+   <th style="text-align:right;"> alpha se </th>
+   <th style="text-align:right;"> var.r </th>
+   <th style="text-align:right;"> med.r </th>
+  </tr>
+ </thead>
+<tbody>
+  <tr>
+   <td style="text-align:left;"> ehiDGEN_5_mean </td>
+   <td style="text-align:right;"> 0.4125021 </td>
+   <td style="text-align:right;"> 0.6576471 </td>
+   <td style="text-align:right;"> 0.5754825 </td>
+   <td style="text-align:right;"> 0.3903632 </td>
+   <td style="text-align:right;"> 1.920963 </td>
+   <td style="text-align:right;"> 0.0273016 </td>
+   <td style="text-align:right;"> 0.0105384 </td>
+   <td style="text-align:right;"> 0.4162485 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehiDGEN_10_mean </td>
+   <td style="text-align:right;"> 0.4091906 </td>
+   <td style="text-align:right;"> 0.6003996 </td>
+   <td style="text-align:right;"> 0.5190903 </td>
+   <td style="text-align:right;"> 0.3337035 </td>
+   <td style="text-align:right;"> 1.502500 </td>
+   <td style="text-align:right;"> 0.0345773 </td>
+   <td style="text-align:right;"> 0.0161841 </td>
+   <td style="text-align:right;"> 0.2873396 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehi5_global_mean </td>
+   <td style="text-align:right;"> 0.5572211 </td>
+   <td style="text-align:right;"> 0.6271436 </td>
+   <td style="text-align:right;"> 0.5440163 </td>
+   <td style="text-align:right;"> 0.3592479 </td>
+   <td style="text-align:right;"> 1.681998 </td>
+   <td style="text-align:right;"> 0.0259296 </td>
+   <td style="text-align:right;"> 0.0113815 </td>
+   <td style="text-align:right;"> 0.4162485 </td>
+  </tr>
+  <tr>
+   <td style="text-align:left;"> ehi10_global_mean </td>
+   <td style="text-align:right;"> 0.5282880 </td>
+   <td style="text-align:right;"> 0.5963510 </td>
+   <td style="text-align:right;"> 0.5041975 </td>
+   <td style="text-align:right;"> 0.3299683 </td>
+   <td style="text-align:right;"> 1.477400 </td>
+   <td style="text-align:right;"> 0.0271341 </td>
+   <td style="text-align:right;"> 0.0068450 </td>
+   <td style="text-align:right;"> 0.2873396 </td>
+  </tr>
+</tbody>
+</table></body></html>
--- a/eohi2/README_Variable_Creation.txt
+++ b/eohi2/README_Variable_Creation.txt
--- a/eohi2/RMD
+++ b/eohi2/RMD
--- a/eohi2/Rplots.pdf
+++ b/eohi2/Rplots.pdf
--- a/eohi2/STD_EHI_correlation_matrix.csv
+++ b/eohi2/STD_EHI_correlation_matrix.csv
@ -0,0 +1,7 @@
+,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
+sex_dummy,0.079,,,,,
+demo_age_1,-0.22***,-0.028,,,,
+edu_num,-0.027,0.0094,-0.078,,,
+aot_total,-0.028,-0.072,-0.20***,0.10*,,
+crt_correct,0.11*,-0.16***,-0.0044,0.12**,-0.037,
+crt_int,-0.12**,0.14**,0.028,-0.11*,-0.011,-0.88***
--- a/eohi2/STD_EHI_correlation_pvalues.csv
+++ b/eohi2/STD_EHI_correlation_pvalues.csv
@ -0,0 +1,7 @@
+,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
+sex_dummy,0.080650819,,,,,
+demo_age_1,1.40537E-06,0.536013997,,,,
+edu_num,0.551130545,0.835590798,0.086262901,,,
+aot_total,0.545115793,0.112852544,5.87906E-06,0.025659604,,
+crt_correct,0.011928378,0.000483116,0.923101803,0.00884239,0.4122211,
+crt_int,0.007552737,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
--- a/eohi2/correlation
+++ b/eohi2/correlation
@ -0,0 +1,100 @@
+options(scipen = 999)
+
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+df <- read.csv("eohi2.csv")
+
+data <- df %>%
+  select(stdEHI_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
+  filter(demo_sex != "Prefer not to say")
+
+print(colSums(is.na(data)))
+print(sapply(data, class))
+
+# Create dummy variable for sex (0 = Male, 1 = Female)
+data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
+
+# Verify the dummy coding
+print(table(data$demo_sex, data$sex_dummy))
+
+#descriptives
+
+# Descriptives for age
+print(summary(data$demo_age_1))
+print(sd(data$demo_age_1, na.rm = TRUE))
+
+
+# Descriptives for sex (frequency table)
+print(table(data$demo_sex))
+print(prop.table(table(data$demo_sex)))
+
+# Descriptives for sex dummy variable
+print(table(data$sex_dummy))
+
+# Convert edu3 to numeric factor for correlations (1, 2, 3)
+# First ensure edu3 is a factor, then convert to numeric
+data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
+data$edu_num <- as.numeric(data$edu3)
+
+# Check the numeric conversion
+print(table(data$edu_num, useNA = "ifany"))
+
+# Verify the conversion
+print(table(data$edu3, data$edu_num, useNA = "ifany"))
+
+####correlation matrix ####
+
+# Select numeric variables for correlation matrix
+numeric_vars <- data %>%
+  select(stdEHI_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
+
+# Create Spearman correlation matrix
+cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
+
+# Print correlation matrix
+print(round(cor_matrix, 3))
+
+# Get significance tests for correlations using psych package
+library(psych)
+
+# Create correlation matrix with significance tests
+cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
+
+# Print correlation matrix
+print(round(cor_test$r, 3))
+
+# Print p-values
+print(round(cor_test$p, 3))
+
+# Print all correlations with r and p values (for reporting)
+for(i in 1:nrow(cor_test$r)) {
+  for(j in 1:ncol(cor_test$r)) {
+    if(i != j) {  # Skip diagonal
+      cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j], 
+          ": r =", round(cor_test$r[i, j], 3), 
+          ", p =", round(cor_test$p[i, j], 3), "\n")
+    }
+  }
+}
+
+# Also print significant correlations summary
+sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
+if(nrow(sig_cors) > 0) {
+  for(i in 1:nrow(sig_cors)) {
+    row_idx <- sig_cors[i, 1]
+    col_idx <- sig_cors[i, 2]
+    if(row_idx != col_idx) {  # Skip diagonal
+      cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx], 
+          ": r =", round(cor_test$r[row_idx, col_idx], 3), 
+          ", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
+    }
+  }
+}
+
+# Save correlation matrix and p-values to CSV files
+write.csv(cor_test$r, "STD_EHI_correlation_matrix.csv", row.names = TRUE)
+write.csv(cor_test$p, "STD_EHI_correlation_pvalues.csv", row.names = TRUE)
+print("Correlation matrix saved to STD_EHI_correlation_matrix.csv")
+print("P-values saved to STD_EHI_correlation_pvalues.csv")
--- a/eohi2/correlationCORRECT_exp2.csv
+++ b/eohi2/correlationCORRECT_exp2.csv
@ -0,0 +1,63 @@
+Variable1,Variable2,Spearman_r,P_value
+ehiDGEN_5_Pers,aot_total,-0.136252077,0.002533005
+ehi5_global_mean,crt_int,-0.111415172,0.013696124
+ehi5_pref_MEAN,crt_int,-0.111269786,0.013820353
+ehi5_pref_MEAN,crt_correct,0.092043471,0.041901166
+ehi5_global_mean,crt_correct,0.091771139,0.042513801
+,,,
+ehi10_global_mean,crt_correct,0.086125328,0.057016745
+ehi10_global_mean,crt_int,-0.077752437,0.085875618
+ehi10_global_mean,aot_total,0.038353442,0.397405642
+ehi10_pers_MEAN,crt_int,-0.078358781,0.083451324
+ehi10_pers_MEAN,crt_correct,0.062086308,0.170454491
+ehi10_pers_MEAN,aot_total,0.008627548,0.849074663
+ehi10_pref_MEAN,crt_correct,0.076608495,0.090603647
+ehi10_pref_MEAN,crt_int,-0.020948272,0.64400852
+ehi10_pref_MEAN,aot_total,-0.006633556,0.883671698
+ehi10_val_MEAN,crt_correct,0.05989196,0.186098498
+ehi10_val_MEAN,crt_int,-0.057978747,0.200580439
+ehi10_val_MEAN,aot_total,-0.003835901,0.932573186
+ehi5.10_global_mean,crt_correct,0.033565462,0.458964083
+ehi5.10_global_mean,aot_total,-0.031442998,0.487869173
+ehi5.10_global_mean,crt_int,-0.023334683,0.606722562
+ehi5.10_pers_MEAN,crt_correct,0.030973863,0.494387766
+ehi5.10_pers_MEAN,crt_int,-0.02351475,0.603948546
+ehi5.10_pers_MEAN,aot_total,-0.006387901,0.887950695
+ehi5.10_pref_MEAN,aot_total,-0.012595122,0.781151691
+ehi5.10_pref_MEAN,crt_correct,-0.011596204,0.798118255
+ehi5.10_pref_MEAN,crt_int,0.00927269,0.837939503
+ehi5.10_val_MEAN,crt_int,-0.02343742,0.60513915
+ehi5.10_val_MEAN,aot_total,-0.01868822,0.680165418
+ehi5.10_val_MEAN,crt_correct,0.016882557,0.709594395
+ehi5_global_mean,aot_total,-0.012144664,0.788790373
+ehi5_pers_MEAN,crt_correct,0.041879574,0.35541717
+ehi5_pers_MEAN,crt_int,-0.038984568,0.38967884
+ehi5_pers_MEAN,aot_total,-0.002320273,0.959183882
+ehi5_pref_MEAN,aot_total,-0.009156402,0.839944313
+ehi5_val_MEAN,crt_int,-0.060691086,0.180283214
+ehi5_val_MEAN,aot_total,-0.043640227,0.335537257
+ehi5_val_MEAN,crt_correct,0.038349744,0.397451186
+ehiDGEN_10_mean,crt_int,-0.060692684,0.180271722
+ehiDGEN_10_mean,crt_correct,0.048693261,0.282530257
+ehiDGEN_10_mean,aot_total,-0.007003919,0.877226879
+ehiDGEN_10_Pers,crt_int,-0.08321639,0.065963243
+ehiDGEN_10_Pers,crt_correct,0.062811394,0.165507368
+ehiDGEN_10_Pers,aot_total,-0.005213733,0.908446087
+ehiDGEN_10_Pref,crt_correct,0.056920022,0.208938511
+ehiDGEN_10_Pref,crt_int,-0.054154647,0.231949195
+ehiDGEN_10_Pref,aot_total,0.012229383,0.787352177
+ehiDGEN_10_Val,crt_correct,-0.046875479,0.300907385
+ehiDGEN_10_Val,aot_total,-0.041722577,0.35722513
+ehiDGEN_10_Val,crt_int,0.029931685,0.509033307
+ehiDGEN_5_mean,aot_total,-0.068430961,0.130753688
+ehiDGEN_5_mean,crt_correct,0.047639811,0.293085717
+ehiDGEN_5_mean,crt_int,-0.042472828,0.348637399
+ehiDGEN_5_Pers,crt_correct,0.061428736,0.175035746
+ehiDGEN_5_Pers,crt_int,-0.038746148,0.392587023
+ehiDGEN_5_Pref,crt_int,-0.042178047,0.351995885
+ehiDGEN_5_Pref,aot_total,-0.038691604,0.393254174
+ehiDGEN_5_Pref,crt_correct,0.033401162,0.461166903
+ehiDGEN_5_Val,aot_total,-0.080834838,0.074119126
+ehiDGEN_5_Val,crt_int,-0.024116805,0.594715088
+ehiDGEN_5_Val,crt_correct,0.02187202,0.629462582
+,,,
--- a/eohi2/correlation_matrix.csv
+++ b/eohi2/correlation_matrix.csv
@ -0,0 +1,14 @@
+,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
+ehiDGEN_10_mean,0.42***,,,,,,,,
+ehi5_global_mean,0.21***,0.21***,,,,,,,
+ehi10_global_mean,0.19***,0.37***,0.43***,,,,,,
+sex_dummy,0.074,0.053,0.023,0.051,,,,,
+demo_age_1,-0.053,-0.22***,-0.21***,-0.26***,-0.028,,,,
+edu_num,-0.031,-0.037,0.083,-0.073,0.0094,-0.078,,,
+aot_total,-0.064,-0.0067,-0.0032,0.050,-0.072,-0.20***,0.10*,,
+crt_correct,0.045,0.053,0.093*,0.090*,-0.16***,-0.0044,0.12**,-0.037,
+crt_int,-0.041,-0.065,-0.11*,-0.083,0.14*,0.028,-0.11*,-0.011,-0.88***
+,,,,,,,,,
+*p<0.05,,,,,,,,,
+**p<0.01,,,,,,,,,
+***p<0.001,,,,,,,,,
--- a/eohi2/correlation_plot_domain_general_vars_spearman.pdf
+++ b/eohi2/correlation_plot_domain_general_vars_spearman.pdf
--- a/eohi2/correlation_plot_domain_vars_pearson.pdf
+++ b/eohi2/correlation_plot_domain_vars_pearson.pdf
--- a/eohi2/correlation_plot_domain_vars_spearman.pdf
+++ b/eohi2/correlation_plot_domain_vars_spearman.pdf
--- a/eohi2/correlation_plot_scales_spearman.pdf
+++ b/eohi2/correlation_plot_scales_spearman.pdf
--- a/eohi2/correlation_pvalues.csv
+++ b/eohi2/correlation_pvalues.csv
@ -0,0 +1,10 @@
+,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
+ehiDGEN_10_mean,2.9929E-22,,,,,,,,
+ehi5_global_mean,4.14491E-06,3.02E-06,,,,,,,
+ehi10_global_mean,1.61573E-05,1.66719E-17,1.63546E-23,,,,,,
+sex_dummy,0.104170333,0.245085093,0.606827914,0.260991884,,,,,
+demo_age_1,0.239362544,1.57395E-06,4.57184E-06,5.31327E-09,0.536013997,,,,
+edu_num,0.489059792,0.41328311,0.067372333,0.10843647,0.835590798,0.086262901,,,
+aot_total,0.156912095,0.882372703,0.943759329,0.269428383,0.112852544,5.87906E-06,0.025659604,,
+crt_correct,0.319158511,0.24026811,0.039792024,0.047593264,0.000483116,0.923101803,0.00884239,0.4122211,
+crt_int,0.365601388,0.150705909,0.01133869,0.065890723,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,34 @@
+Variable1,Variable2,Spearman_r,P_value
+DGEN_10_global_mean,aot_total,0.393039545,0
+DGEN_5_global_mean,aot_total,0.401732294,0
+DGEN_fut_10_mean,aot_total,0.372705919,0
+DGEN_fut_5_mean,aot_total,0.400202502,0
+DGEN_past_10_mean,aot_total,0.368847521,0
+DGEN_past_5_mean,aot_total,0.376820454,0
+DGENfut_global_mean,aot_total,0.382481063,0
+DGENpast_global_mean,aot_total,0.369258554,0
+DGEN_fut_5_mean,crt_correct,-0.08823341,0.051182461
+DGENfut_global_mean,crt_correct,-0.086607229,0.05563658
+DGEN_5_global_mean,crt_correct,-0.079087177,0.080612427
+DGEN_fut_10_mean,crt_correct,-0.073786402,0.103162718
+DGEN_fut_5.10_mean,aot_total,0.06505662,0.150871574
+DGEN_10_global_mean,crt_correct,-0.060769258,0.17972166
+DGEN_5.10_global_mean,aot_total,0.058102985,0.199615824
+DGEN_past_5_mean,crt_correct,-0.057080059,0.207659218
+DGEN_fut_5.10_mean,crt_correct,-0.052644919,0.245242874
+DGEN_past_5.10_mean,aot_total,0.050343754,0.266514845
+DGEN_fut_5.10_mean,crt_int,0.049423543,0.275365524
+DGENpast_global_mean,crt_correct,-0.048081298,0.288630365
+DGEN_past_10_mean,crt_int,-0.042776663,0.345197086
+DGEN_5.10_global_mean,crt_correct,-0.036910124,0.415418734
+DGENpast_global_mean,crt_int,-0.035697633,0.430916171
+DGEN_past_10_mean,crt_correct,-0.035562484,0.432664043
+DGEN_past_5_mean,crt_int,-0.031740856,0.483754572
+DGEN_10_global_mean,crt_int,-0.019205703,0.671817818
+DGEN_past_5.10_mean,crt_int,-0.015826279,0.727015617
+DGEN_5_global_mean,crt_int,-0.014881366,0.742720553
+DGEN_past_5.10_mean,crt_correct,-0.013651301,0.763324994
+DGEN_5.10_global_mean,crt_int,0.011869586,0.793465063
+DGENfut_global_mean,crt_int,0.009764448,0.829473295
+DGEN_fut_10_mean,crt_int,0.002368134,0.95834271
+DGEN_fut_5_mean,crt_int,-0.0005323,0.990632398
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,175 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load required libraries
+library(corrplot)
+library(Hmisc)
+library(psych)
+
+# Load the data
+exp2_data <- read.csv("eohi2.csv")
+
+# Define the two sets of variables
+set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean", 
+               "DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean", 
+               "DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
+set2_vars <- c("aot_total", "crt_correct", "crt_int")
+
+# Create subset with only the variables of interest
+correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
+
+# ===== NORMALITY CHECKS =====
+# Shapiro-Wilk tests for normality (n < 5000)
+for(var in names(correlation_data)) {
+  if(length(na.omit(correlation_data[[var]])) <= 5000) {
+    shapiro_result <- shapiro.test(correlation_data[[var]])
+    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
+                var, shapiro_result$p.value, 
+                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
+  }
+}
+
+
+# Visual normality checks
+pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
+par(mfrow = c(2, 4))
+for(var in names(correlation_data)) {
+  # Histogram with normal curve overlay
+  hist(correlation_data[[var]], main = paste("Histogram:", var), 
+       xlab = var, freq = FALSE)
+  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
+              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
+        add = TRUE, col = "red", lwd = 2)
+  
+  # Q-Q plot
+  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
+  qqline(correlation_data[[var]], col = "red", lwd = 2)
+}
+dev.off()
+
+# ===== LINEARITY CHECKS =====
+# Check linearity between variable pairs
+pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
+par(mfrow = c(4, 3))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    # Scatter plot with regression line
+    plot(correlation_data[[var1]], correlation_data[[var2]], 
+         main = paste(var1, "vs", var2),
+         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
+    
+    # Add linear regression line
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    abline(lm_fit, col = "red", lwd = 2)
+    
+    # Add LOESS smooth line for non-linear pattern detection
+    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
+    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
+                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
+    loess_pred <- predict(loess_fit, x_seq)
+    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
+    
+    # Calculate R-squared for linear fit
+    r_squared <- summary(lm_fit)$r.squared
+    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
+  }
+}
+dev.off()
+
+# Residual analysis for linearity
+pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
+par(mfrow = c(4, 3))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    residuals <- residuals(lm_fit)
+    fitted <- fitted(lm_fit)
+    
+    plot(fitted, residuals, 
+         main = paste("Residuals:", var1, "vs", var2),
+         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
+    abline(h = 0, col = "red", lwd = 2)
+    
+    # Add smooth line to residuals
+    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
+  }
+}
+dev.off()
+
+
+# Calculate Spearman correlation matrix only
+cor_matrix_spearman <- cor(correlation_data, method = "spearman")
+
+# Print correlation matrix with 5 decimal places
+print(round(cor_matrix_spearman, 5))
+
+# Separate correlations between the two sets (Spearman)
+set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
+print(round(set1_set2_cor, 5))
+
+# Calculate correlations within each set (Spearman)
+set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
+set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
+
+# Statistical significance tests (Spearman)
+cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
+
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
+  }
+}
+
+# Create correlation plot for Spearman only
+pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
+corrplot(cor_matrix_spearman, method = "color", type = "upper", 
+         order = "hclust", tl.cex = 0.8, tl.col = "black",
+         addCoef.col = "black", number.cex = 0.7,
+         title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
+dev.off()
+
+# Summary statistics
+desc_stats <- describe(correlation_data)
+print(round(desc_stats, 5))
+
+# Save results to CSV files
+write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
+write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
+
+# Save correlation results in a formatted table
+cor_results <- data.frame(
+  Variable1 = character(),
+  Variable2 = character(),
+  Spearman_r = numeric(),
+  P_value = numeric(),
+  stringsAsFactors = FALSE
+)
+
+# Extract correlations between sets
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    r_val <- cor_matrix_spearman[var1, var2]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    
+    cor_results <- rbind(cor_results, data.frame(
+      Variable1 = var1,
+      Variable2 = var2,
+      Spearman_r = r_val,
+      P_value = p_val,
+      stringsAsFactors = FALSE
+    ))
+  }
+}
+
+write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,34 @@
+Variable1,Variable2,Spearman_r,P_value
+N10_global_mean,aot_total,0.150441363,0.000846088
+N5_global_mean,aot_total,0.185910923,3.52323E-05
+NFut_10_mean,aot_total,0.155140023,0.000575829
+NFut_5_mean,aot_total,0.199536342,8.75208E-06
+NFut_global_mean,aot_total,0.185343328,3.72583E-05
+NPast_10_mean,aot_total,0.143227088,0.001495728
+NPast_5_mean,aot_total,0.150454018,0.000845224
+NPast_global_mean,aot_total,0.148341676,0.001001318
+X5.10_global_mean,aot_total,0.206551087,4.11273E-06
+X5.10fut_mean,aot_total,0.208127536,3.45821E-06
+X5.10past_mean,aot_total,0.168898536,0.000175189
+NFut_5_mean,crt_correct,-0.055312717,0.222103555
+NPast_5_mean,crt_int,-0.053658543,0.236260102
+NPast_global_mean,crt_int,-0.043976695,0.33182097
+NPast_10_mean,crt_int,-0.043338736,0.33888983
+NFut_global_mean,crt_correct,-0.036977473,0.414567649
+X5.10fut_mean,crt_int,-0.030131358,0.506209892
+NPast_5_mean,crt_correct,0.028880988,0.524024755
+NFut_5_mean,crt_int,0.027093379,0.550039031
+X5.10_global_mean,crt_int,-0.025449064,0.574514939
+NPast_10_mean,crt_correct,0.021023332,0.642821387
+NPast_global_mean,crt_correct,0.020816029,0.646102235
+NFut_10_mean,crt_correct,-0.020486277,0.651335162
+X5.10past_mean,crt_int,-0.018645297,0.680859581
+N5_global_mean,crt_correct,-0.018075161,0.690105495
+N10_global_mean,crt_int,-0.016544862,0.71514808
+NFut_10_mean,crt_int,-0.009395086,0.835830518
+NFut_global_mean,crt_int,0.008419817,0.852666611
+N10_global_mean,crt_correct,-0.008133995,0.857613839
+N5_global_mean,crt_int,-0.007330571,0.87154938
+X5.10fut_mean,crt_correct,-0.005165193,0.909294754
+X5.10_global_mean,crt_correct,-0.004538036,0.920269031
+X5.10past_mean,crt_correct,0.003021813,0.94685912
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,197 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load required libraries
+library(corrplot)
+library(Hmisc)
+library(psych)
+
+# Load the data
+exp2_data <- read.csv("eohi2.csv")
+
+# Define the two sets of variables
+set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean", 
+               "X5.10past_mean", "X5.10fut_mean", "NPast_global_mean", 
+               "NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
+set2_vars <- c("aot_total", "crt_correct", "crt_int")
+
+# Create subset with only the variables of interest
+correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
+
+# ===== NORMALITY CHECKS =====
+# Shapiro-Wilk tests for normality (n < 5000)
+for(var in names(correlation_data)) {
+  if(length(na.omit(correlation_data[[var]])) <= 5000) {
+    shapiro_result <- shapiro.test(correlation_data[[var]])
+    cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
+                var, shapiro_result$p.value, 
+                ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
+  }
+}
+
+# Kolmogorov-Smirnov test for normality
+for(var in names(correlation_data)) {
+  ks_result <- ks.test(correlation_data[[var]], "pnorm", 
+                       mean = mean(correlation_data[[var]], na.rm = TRUE),
+                       sd = sd(correlation_data[[var]], na.rm = TRUE))
+  cat(sprintf("%s: KS p = %.5f %s\n", 
+              var, ks_result$p.value,
+              ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
+}
+
+# Visual normality checks
+pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
+par(mfrow = c(2, 4))
+for(var in names(correlation_data)) {
+  # Histogram with normal curve overlay
+  hist(correlation_data[[var]], main = paste("Histogram:", var), 
+       xlab = var, freq = FALSE)
+  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
+              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
+        add = TRUE, col = "red", lwd = 2)
+  
+  # Q-Q plot
+  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
+  qqline(correlation_data[[var]], col = "red", lwd = 2)
+}
+dev.off()
+
+# ===== LINEARITY CHECKS =====
+# Check linearity between variable pairs
+pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
+par(mfrow = c(4, 3))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    # Scatter plot with regression line
+    plot(correlation_data[[var1]], correlation_data[[var2]], 
+         main = paste(var1, "vs", var2),
+         xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
+    
+    # Add linear regression line
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    abline(lm_fit, col = "red", lwd = 2)
+    
+    # Add LOESS smooth line for non-linear pattern detection
+    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
+    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
+                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
+    loess_pred <- predict(loess_fit, x_seq)
+    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
+    
+    # Calculate R-squared for linear fit
+    r_squared <- summary(lm_fit)$r.squared
+    cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
+  }
+}
+dev.off()
+
+# Residual analysis for linearity
+pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
+par(mfrow = c(4, 3))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    residuals <- residuals(lm_fit)
+    fitted <- fitted(lm_fit)
+    
+    plot(fitted, residuals, 
+         main = paste("Residuals:", var1, "vs", var2),
+         xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
+    abline(h = 0, col = "red", lwd = 2)
+    
+    # Add smooth line to residuals
+    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
+  }
+}
+dev.off()
+
+
+# Calculate correlation matrices (both Pearson and Spearman)
+cor_matrix_pearson <- cor(correlation_data, method = "pearson")
+cor_matrix_spearman <- cor(correlation_data, method = "spearman")
+
+# Use Spearman as primary method
+cor_matrix <- cor_matrix_spearman
+
+# Print correlation matrices with 5 decimal places
+print(round(cor_matrix_spearman, 5))
+
+# Separate correlations between the two sets (Spearman)
+set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
+print(round(set1_set2_cor, 5))
+
+# Calculate correlations within each set (Spearman)
+set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
+set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
+
+# Statistical significance tests (Spearman)
+cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
+
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
+  }
+}
+
+# Create correlation plots for both methods
+pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
+corrplot(cor_matrix_spearman, method = "color", type = "upper", 
+         order = "hclust", tl.cex = 0.8, tl.col = "black",
+         addCoef.col = "black", number.cex = 0.7,
+         title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
+dev.off()
+
+pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
+corrplot(cor_matrix_pearson, method = "color", type = "upper", 
+         order = "hclust", tl.cex = 0.8, tl.col = "black",
+         addCoef.col = "black", number.cex = 0.7,
+         title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
+dev.off()
+
+# Summary statistics
+desc_stats <- describe(correlation_data)
+print(round(desc_stats, 5))
+
+# Save results to CSV files
+write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
+write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
+write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
+
+# Save correlation results in a formatted table
+cor_results <- data.frame(
+  Variable1 = character(),
+  Variable2 = character(),
+  Spearman_r = numeric(),
+  P_value = numeric(),
+  stringsAsFactors = FALSE
+)
+
+# Extract significant correlations between sets
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    r_val <- cor_matrix_spearman[var1, var2]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    
+    cor_results <- rbind(cor_results, data.frame(
+      Variable1 = var1,
+      Variable2 = var2,
+      Spearman_r = r_val,
+      P_value = p_val,
+      stringsAsFactors = FALSE
+    ))
+  }
+}
+
+write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
+
--- a/eohi2/correlations
+++ b/eohi2/correlations
@ -0,0 +1,176 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load required libraries
+library(corrplot)
+library(Hmisc)
+library(psych)
+
+# Load the data
+exp1_data <- read.csv("eohi2.csv")
+
+# Define the two sets of variables
+set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val", 
+               "ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
+               "ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
+               "ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
+               "ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
+               "ehiDGEN_5_mean", "ehiDGEN_10_mean",
+               "ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
+set2_vars <- c("aot_total", "crt_correct", "crt_int")
+
+# Create subset with only the variables of interest
+correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
+
+# ===== NORMALITY CHECKS =====
+# Shapiro-Wilk tests for normality
+for(var in names(correlation_data)) {
+  shapiro_result <- shapiro.test(correlation_data[[var]])
+  cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n", 
+              var, shapiro_result$p.value, 
+              ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
+}
+
+# Visual normality checks
+pdf("normality_plots.pdf", width = 12, height = 8)
+par(mfrow = c(2, 4))
+for(var in names(correlation_data)) {
+  # Histogram with normal curve overlay
+  hist(correlation_data[[var]], main = paste("Histogram:", var), 
+       xlab = var, freq = FALSE)
+  curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE), 
+              sd = sd(correlation_data[[var]], na.rm = TRUE)), 
+        add = TRUE, col = "red", lwd = 2)
+  
+  # Q-Q plot
+  qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
+  qqline(correlation_data[[var]], col = "red", lwd = 2)
+}
+dev.off()
+
+# ===== LINEARITY CHECKS =====
+# Check linearity between variable pairs
+pdf("linearity_plots.pdf", width = 15, height = 10)
+par(mfrow = c(3, 5))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    # Scatter plot with regression line
+    plot(correlation_data[[var1]], correlation_data[[var2]], 
+         main = paste(var1, "vs", var2),
+         xlab = var1, ylab = var2, pch = 16, cex = 0.6)
+    
+    # Add linear regression line
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    abline(lm_fit, col = "red", lwd = 2)
+    
+    # Add LOESS smooth line for non-linear pattern detection
+    loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
+    x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE), 
+                 max(correlation_data[[var1]], na.rm = TRUE), length = 100)
+    loess_pred <- predict(loess_fit, x_seq)
+    lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
+    
+    # Calculate R-squared for linear fit
+    r_squared <- summary(lm_fit)$r.squared
+    cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
+  }
+}
+dev.off()
+
+# Residual analysis for linearity
+pdf("residual_plots.pdf", width = 15, height = 10)
+par(mfrow = c(3, 5))
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    
+    lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
+    residuals <- residuals(lm_fit)
+    fitted <- fitted(lm_fit)
+    
+    plot(fitted, residuals, 
+         main = paste("Residuals:", var1, "vs", var2),
+         xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
+    abline(h = 0, col = "red", lwd = 2)
+    
+    # Add smooth line to residuals
+    lines(lowess(fitted, residuals), col = "blue", lwd = 2)
+  }
+}
+dev.off()
+
+
+# Calculate correlation matrix (Spearman only)
+cor_matrix_spearman <- cor(correlation_data, method = "spearman")
+
+# Print correlation matrix with 5 decimal places
+print(round(cor_matrix_spearman, 5))
+
+# Separate correlations between the two sets (Spearman)
+set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
+print(round(set1_set2_cor, 5))
+
+# Calculate correlations within each set (Spearman)
+set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
+set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
+
+# Statistical significance tests (Spearman)
+cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
+
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
+  }
+}
+
+# Create correlation plot (Spearman only)
+pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
+corrplot(cor_matrix_spearman, method = "color", type = "upper", 
+         order = "hclust", tl.cex = 0.8, tl.col = "black",
+         addCoef.col = "black", number.cex = 0.7,
+         title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
+dev.off()
+
+# Summary statistics
+desc_stats <- describe(correlation_data)
+print(round(desc_stats, 5))
+
+# Save results to CSV files
+write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
+write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
+
+# Save correlation results in a formatted table
+cor_results <- data.frame(
+  Variable1 = character(),
+  Variable2 = character(),
+  Spearman_r = numeric(),
+  P_value = numeric(),
+  stringsAsFactors = FALSE
+)
+
+# Extract significant correlations between sets
+for(i in 1:length(set1_vars)) {
+  for(j in 1:length(set2_vars)) {
+    var1 <- set1_vars[i]
+    var2 <- set2_vars[j]
+    r_val <- cor_matrix_spearman[var1, var2]
+    p_val <- cor_test_results_spearman$P[var1, var2]
+    
+    cor_results <- rbind(cor_results, data.frame(
+      Variable1 = var1,
+      Variable2 = var2,
+      Spearman_r = r_val,
+      P_value = p_val,
+      stringsAsFactors = FALSE
+    ))
+  }
+}
+
+write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,266 @@
+# Script to combine and recode Likert scale items in eohi2.csv
+# Combines 01 and 02 versions of items, then recodes text to numeric values
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings="" keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define the mapping function
+recode_likert <- function(x) {
+  case_when(
+    tolower(x) == "strongly disagree" ~ -3,
+    tolower(x) == "disagree" ~ -2,
+    tolower(x) == "somewhat disagree" ~ -1,
+    tolower(x) == "neither agree nor disagree" ~ 0,
+    tolower(x) == "somewhat agree" ~ 1,
+    tolower(x) == "agree" ~ 2,
+    tolower(x) == "strongly agree" ~ 3,
+    TRUE ~ NA_real_
+  )
+}
+
+# Define source column pairs (Set A and Set B)
+source_cols_A <- c(
+  "01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
+  "01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
+  "01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
+  "01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
+  "01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
+  "01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
+  "01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
+  "01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
+  "01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
+  "01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
+  "01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
+  "01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
+)
+
+source_cols_B <- c(
+  "02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
+  "02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
+  "02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
+  "02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
+  "02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
+  "02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
+  "02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
+  "02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
+  "02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
+  "02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
+  "02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
+  "02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
+)
+
+# Define target column names
+target_cols <- c(
+  "past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
+  "past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
+  "past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
+  "past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
+  "past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
+  "past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
+  "fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
+  "fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
+  "fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
+  "fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
+  "fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
+  "fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
+)
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Print first 30 actual column names for debugging
+cat("First 30 actual column names in CSV:\n")
+for (i in 1:min(30, length(df_cols))) {
+  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
+}
+cat("\n")
+
+# Check Source A columns
+missing_A <- source_cols_A[!source_cols_A %in% df_cols]
+existing_A <- source_cols_A[source_cols_A %in% df_cols]
+
+cat("Source Set A:\n")
+cat("  Expected: 60 columns\n")
+cat("  Found:", length(existing_A), "columns\n")
+cat("  Missing:", length(missing_A), "columns\n")
+
+if (length(missing_A) > 0) {
+  cat("\n  Missing columns from Set A:\n")
+  for (col in missing_A) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check Source B columns
+missing_B <- source_cols_B[!source_cols_B %in% df_cols]
+existing_B <- source_cols_B[source_cols_B %in% df_cols]
+
+cat("\nSource Set B:\n")
+cat("  Expected: 60 columns\n")
+cat("  Found:", length(existing_B), "columns\n")
+cat("  Missing:", length(missing_B), "columns\n")
+
+if (length(missing_B) > 0) {
+  cat("\n  Missing columns from Set B:\n")
+  for (col in missing_B) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check for columns with similar names (potential typos/spaces)
+if (length(missing_A) > 0 || length(missing_B) > 0) {
+  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
+  all_missing <- c(missing_A, missing_B)
+  for (miss_col in all_missing) {
+    # Find columns that start with similar pattern
+    pattern <- substr(miss_col, 1, 10)
+    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
+    if (length(similar) > 0) {
+      cat("\nLooking for:", miss_col)
+      cat("\n  Similar columns found:\n")
+      for (sim in similar) {
+        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
+      }
+    }
+  }
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_A) > 30 || length(missing_B) > 30) {
+  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# Process each pair of columns
+for (i in 1:60) {
+  col_A <- source_cols_A[i]
+  col_B <- source_cols_B[i]
+  target_col <- target_cols[i]
+  
+  # Get values from columns, handling missing columns
+  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
+  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
+  
+  # Coalesce: take value from vals_A if present, otherwise from vals_B
+  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
+                     vals_A, 
+                     vals_B)
+  
+  # Recode to numeric
+  df[[target_col]] <- recode_likert(combined)
+  
+  # Print progress
+  cat("Processed:", target_col, "\n")
+}
+
+# ============= VERIFY TARGET COLUMNS WERE CREATED =============
+cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
+
+# Get updated column names
+df_cols_after <- trimws(names(df))
+
+# Check which target columns exist
+existing_targets <- target_cols[target_cols %in% df_cols_after]
+missing_targets <- target_cols[!target_cols %in% df_cols_after]
+
+cat("Target Columns:\n")
+cat("  Expected: 60 columns\n")
+cat("  Created:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  WARNING: The following target columns were NOT created:\n")
+  for (col in missing_targets) {
+    cat("    -", col, "\n")
+  }
+  stop("\nERROR: Not all target columns were created successfully!")
+} else {
+  cat("\n  SUCCESS: All 60 target columns created successfully!\n")
+}
+
+cat("\n=== END VERIFICATION ===\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
+# This function can be run multiple times to check different random rows
+
+qa_check_random_row <- function() {
+  # Pick a random row
+  random_row <- sample(1:nrow(df), 1)
+  
+  cat("\n========================================\n")
+  cat("QA CHECK: Random Row #", random_row, "\n")
+  cat("========================================\n\n")
+  
+  # Check each of the 60 pairs
+  for (i in 1:60) {
+    col_A <- source_cols_A[i]
+    col_B <- source_cols_B[i]
+    target_col <- target_cols[i]
+    
+    # Get values
+    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
+    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
+    target_val <- df[random_row, target_col]
+    
+    # Determine which source had the value
+    has_val_A <- !is.na(val_A) && val_A != ""
+    has_val_B <- !is.na(val_B) && val_B != ""
+    
+    if (has_val_A) {
+      source_used <- "A"
+      original_text <- val_A
+    } else if (has_val_B) {
+      source_used <- "B"
+      original_text <- val_B
+    } else {
+      source_used <- "NONE"
+      original_text <- "(empty)"
+    }
+    
+    # Print the info
+    cat(sprintf("Pair %2d:\n", i))
+    cat(sprintf("  Source A: %-30s\n", col_A))
+    cat(sprintf("  Source B: %-30s\n", col_B))
+    cat(sprintf("  Target:   %-30s\n", target_col))
+    cat(sprintf("  Value found in: Source %s\n", source_used))
+    cat(sprintf("  Original text:  '%s'\n", original_text))
+    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
+    cat("\n")
+  }
+  
+  cat("========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on first random row
+cat("\n\n")
+qa_check_random_row()
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
+cat("Run this command in R console:\n")
+cat("  qa_check_random_row()\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
+
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,192 @@
+# Script to recode present-time Likert scale items in eohi2.csv
+# Recodes prePrefItem, prePersItem, and preValItem to numeric values
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define the mapping function
+recode_likert <- function(x) {
+  case_when(
+    tolower(x) == "strongly disagree" ~ -3,
+    tolower(x) == "disagree" ~ -2,
+    tolower(x) == "somewhat disagree" ~ -1,
+    tolower(x) == "neither agree nor disagree" ~ 0,
+    tolower(x) == "somewhat agree" ~ 1,
+    tolower(x) == "agree" ~ 2,
+    tolower(x) == "strongly agree" ~ 3,
+    TRUE ~ NA_real_
+  )
+}
+
+# Define source columns (15 columns total)
+source_cols <- c(
+  "prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
+  "prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
+  "preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
+)
+
+# Define target column names (15 columns total)
+target_cols <- c(
+  "present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
+  "present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
+  "present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
+)
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Print first 30 actual column names for debugging
+cat("First 30 actual column names in CSV:\n")
+for (i in 1:min(30, length(df_cols))) {
+  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
+}
+cat("\n")
+
+# Check Source columns
+missing_source <- source_cols[!source_cols %in% df_cols]
+existing_source <- source_cols[source_cols %in% df_cols]
+
+cat("Source Columns:\n")
+cat("  Expected: 15 columns\n")
+cat("  Found:", length(existing_source), "columns\n")
+cat("  Missing:", length(missing_source), "columns\n")
+
+if (length(missing_source) > 0) {
+  cat("\n  Missing columns:\n")
+  for (col in missing_source) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check for columns with similar names (potential typos/spaces)
+if (length(missing_source) > 0) {
+  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
+  for (miss_col in missing_source) {
+    # Find columns that start with similar pattern
+    pattern <- substr(miss_col, 1, 10)
+    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
+    if (length(similar) > 0) {
+      cat("\nLooking for:", miss_col)
+      cat("\n  Similar columns found:\n")
+      for (sim in similar) {
+        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
+      }
+    }
+  }
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_source) > 7) {
+  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# Check if target columns exist in the dataframe
+cat("\n=== CHECKING TARGET COLUMNS ===\n")
+existing_targets <- target_cols[target_cols %in% df_cols]
+missing_targets <- target_cols[!target_cols %in% df_cols]
+
+cat("Target Columns:\n")
+cat("  Expected: 15 columns\n")
+cat("  Found:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  Target columns do NOT exist yet - will create them.\n")
+  if (length(existing_targets) > 0) {
+    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
+  }
+} else {
+  cat("  All target columns exist - will overwrite with recoded values.\n")
+}
+cat("\n")
+
+# Process each column (overwrite existing target columns with recoded values)
+for (i in 1:15) {
+  source_col <- source_cols[i]
+  target_col <- target_cols[i]
+  
+  # Get values from source column, handling missing columns
+  source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
+  
+  # Recode to numeric and overwrite existing target column
+  df[[target_col]] <- recode_likert(source_vals)
+  
+  # Print progress
+  cat("Processed:", target_col, "\n")
+}
+
+cat("\n=== RECODING COMPLETE ===\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
+# This function can be run multiple times to check different random rows
+
+qa_check_random_row <- function() {
+  # Pick a random row
+  random_row <- sample(1:nrow(df), 1)
+  
+  cat("\n========================================\n")
+  cat("QA CHECK: Random Row #", random_row, "\n")
+  cat("========================================\n\n")
+  
+  # Check each of the 15 columns
+  for (i in 1:15) {
+    source_col <- source_cols[i]
+    target_col <- target_cols[i]
+    
+    # Get values
+    source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
+    target_val <- df[random_row, target_col]
+    
+    # Determine if source has a value
+    has_val <- !is.na(source_val) && source_val != ""
+    
+    original_text <- if (has_val) source_val else "(empty)"
+    
+    # Print the info
+    cat(sprintf("Column %2d:\n", i))
+    cat(sprintf("  Source:   %-30s\n", source_col))
+    cat(sprintf("  Target:   %-30s\n", target_col))
+    cat(sprintf("  Original text:  '%s'\n", original_text))
+    cat(sprintf("  Numeric value:  %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
+    cat("\n")
+  }
+  
+  cat("========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on first random row
+cat("\n\n")
+qa_check_random_row()
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
+cat("Run this command in R console:\n")
+cat("  qa_check_random_row()\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
+write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
+cat("Review the output above, then uncomment line 189 to save changes.\n")
+cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,253 @@
+# Script to combine DGEN variables in eohi2.csv
+# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define source column pairs (Set A and Set B)
+# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
+source_cols_A <- c(
+  "01past5PrefDGEN_1",
+  "01past5PersDGEN_1",
+  "01past5ValDGEN_1",
+  "01past10PrefDGEN_1",
+  "01past10PersDGEN_1",
+  "01past10ValDGEN_1",
+  "01fut5PrefDGEN_8",
+  "01fut5PersDGEN_8",
+  "01fut5ValuesDGEN_1",
+  "01fut10PrefDGEN_8",
+  "01fut10PersDGEN_8",
+  "01fut10ValuesDGEN_1"
+)
+
+source_cols_B <- c(
+  "02past5PrefDGEN_1",
+  "02past5PersDGEN_1",
+  "02past5ValDGEN_1",
+  "02past10PrefDGEN_1",
+  "02past10PersDGEN_1",
+  "02past10ValDGEN_1",
+  "02fut5PrefDGEN_8",
+  "02fut5PersDGEN_8",
+  "02fut5ValDGEN_1",
+  "02fut10PrefDGEN_8",
+  "02fut10PersDGEN_8",
+  "02fut10ValDGEN_1"
+)
+
+# Define target column names
+target_cols <- c(
+  "DGEN_past_5_Pref",
+  "DGEN_past_5_Pers",
+  "DGEN_past_5_Val",
+  "DGEN_past_10_Pref",
+  "DGEN_past_10_Pers",
+  "DGEN_past_10_Val",
+  "DGEN_fut_5_Pref",
+  "DGEN_fut_5_Pers",
+  "DGEN_fut_5_Val",
+  "DGEN_fut_10_Pref",
+  "DGEN_fut_10_Pers",
+  "DGEN_fut_10_Val"
+)
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Print first 30 actual column names for debugging
+cat("First 30 actual column names in CSV:\n")
+for (i in 1:min(30, length(df_cols))) {
+  cat(sprintf("  %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
+}
+cat("\n")
+
+# Check Source A columns
+missing_A <- source_cols_A[!source_cols_A %in% df_cols]
+existing_A <- source_cols_A[source_cols_A %in% df_cols]
+
+cat("Source Set A:\n")
+cat("  Expected: 12 columns\n")
+cat("  Found:", length(existing_A), "columns\n")
+cat("  Missing:", length(missing_A), "columns\n")
+
+if (length(missing_A) > 0) {
+  cat("\n  Missing columns from Set A:\n")
+  for (col in missing_A) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check Source B columns
+missing_B <- source_cols_B[!source_cols_B %in% df_cols]
+existing_B <- source_cols_B[source_cols_B %in% df_cols]
+
+cat("\nSource Set B:\n")
+cat("  Expected: 12 columns\n")
+cat("  Found:", length(existing_B), "columns\n")
+cat("  Missing:", length(missing_B), "columns\n")
+
+if (length(missing_B) > 0) {
+  cat("\n  Missing columns from Set B:\n")
+  for (col in missing_B) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check for columns with similar names (potential typos/spaces)
+if (length(missing_A) > 0 || length(missing_B) > 0) {
+  cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
+  all_missing <- c(missing_A, missing_B)
+  for (miss_col in all_missing) {
+    # Find columns that start with similar pattern
+    pattern <- substr(miss_col, 1, 10)
+    similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
+    if (length(similar) > 0) {
+      cat("\nLooking for:", miss_col)
+      cat("\n  Similar columns found:\n")
+      for (sim in similar) {
+        cat("    - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
+      }
+    }
+  }
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_A) > 6 || length(missing_B) > 6) {
+  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# Process each pair of columns (just copy values, no recoding)
+for (i in 1:12) {
+  col_A <- source_cols_A[i]
+  col_B <- source_cols_B[i]
+  target_col <- target_cols[i]
+  
+  # Get values from columns, handling missing columns
+  vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
+  vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
+  
+  # Coalesce: take value from vals_A if present, otherwise from vals_B
+  # No recoding - just copy the value directly
+  combined <- ifelse(!is.na(vals_A) & vals_A != "", 
+                     vals_A, 
+                     vals_B)
+  
+  # Copy directly to target column (no recoding)
+  df[[target_col]] <- combined
+  
+  # Print progress
+  cat("Processed:", target_col, "\n")
+}
+
+# ============= VERIFY TARGET COLUMNS WERE CREATED =============
+cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
+
+# Get updated column names
+df_cols_after <- trimws(names(df))
+
+# Check which target columns exist
+existing_targets <- target_cols[target_cols %in% df_cols_after]
+missing_targets <- target_cols[!target_cols %in% df_cols_after]
+
+cat("Target Columns:\n")
+cat("  Expected: 12 columns\n")
+cat("  Created:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  WARNING: The following target columns were NOT created:\n")
+  for (col in missing_targets) {
+    cat("    -", col, "\n")
+  }
+  stop("\nERROR: Not all target columns were created successfully!")
+} else {
+  cat("\n  SUCCESS: All 12 target columns created successfully!\n")
+}
+
+cat("\n=== END VERIFICATION ===\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
+# This function can be run multiple times to check different random rows
+
+qa_check_random_row <- function() {
+  # Pick a random row
+  random_row <- sample(1:nrow(df), 1)
+  
+  cat("\n========================================\n")
+  cat("QA CHECK: Random Row #", random_row, "\n")
+  cat("========================================\n\n")
+  
+  # Check each of the 12 pairs
+  for (i in 1:12) {
+    col_A <- source_cols_A[i]
+    col_B <- source_cols_B[i]
+    target_col <- target_cols[i]
+    
+    # Get values
+    val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
+    val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
+    target_val <- df[random_row, target_col]
+    
+    # Determine which source had the value
+    has_val_A <- !is.na(val_A) && val_A != ""
+    has_val_B <- !is.na(val_B) && val_B != ""
+    
+    if (has_val_A) {
+      source_used <- "A"
+      original_value <- val_A
+    } else if (has_val_B) {
+      source_used <- "B"
+      original_value <- val_B
+    } else {
+      source_used <- "NONE"
+      original_value <- "(empty)"
+    }
+    
+    # Print the info
+    cat(sprintf("Pair %2d:\n", i))
+    cat(sprintf("  Source A: %-30s\n", col_A))
+    cat(sprintf("  Source B: %-30s\n", col_B))
+    cat(sprintf("  Target:   %-30s\n", target_col))
+    cat(sprintf("  Value found in: Source %s\n", source_used))
+    cat(sprintf("  Original value: '%s'\n", original_value))
+    cat(sprintf("  Target value:   '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
+    cat("\n")
+  }
+  
+  cat("========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on first random row
+cat("\n\n")
+qa_check_random_row()
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
+cat("Run this command in R console:\n")
+cat("  qa_check_random_row()\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,183 @@
+# Script to calculate DGEN means by time period in eohi2.csv
+# Averages the 3 domain scores (Pref, Pers, Val) for each time period
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define source columns (12 total)
+source_cols <- c(
+  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
+  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
+  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
+  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
+)
+
+# Define target columns (4 total)
+target_cols <- c(
+  "DGEN_past_5_mean",
+  "DGEN_past_10_mean",
+  "DGEN_fut_5_mean",
+  "DGEN_fut_10_mean"
+)
+
+# Define groupings: each target gets 3 source columns
+source_groups <- list(
+  DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
+  DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
+  DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
+  DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
+)
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Check Source columns
+missing_source <- source_cols[!source_cols %in% df_cols]
+existing_source <- source_cols[source_cols %in% df_cols]
+
+cat("Source Columns:\n")
+cat("  Expected: 12 columns\n")
+cat("  Found:", length(existing_source), "columns\n")
+cat("  Missing:", length(missing_source), "columns\n")
+
+if (length(missing_source) > 0) {
+  cat("\n  Missing source columns:\n")
+  for (col in missing_source) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check Target columns
+missing_targets <- target_cols[!target_cols %in% df_cols]
+existing_targets <- target_cols[target_cols %in% df_cols]
+
+cat("\nTarget Columns:\n")
+cat("  Expected: 4 columns\n")
+cat("  Found:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  Target columns do NOT exist yet - will create them.\n")
+  if (length(existing_targets) > 0) {
+    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
+  }
+} else {
+  cat("  All target columns exist - will overwrite with calculated values.\n")
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_source) > 6) {
+  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# ============= CALCULATE MEANS =============
+cat("Calculating DGEN means by time period...\n")
+
+# Convert source columns to numeric
+for (col in source_cols) {
+  if (col %in% names(df)) {
+    df[[col]] <- as.numeric(df[[col]])
+  }
+}
+
+# Calculate each target as the mean of its 3 source columns
+for (target in target_cols) {
+  source_group <- source_groups[[target]]
+  
+  # Get the columns that exist
+  existing_cols <- source_group[source_group %in% names(df)]
+  
+  if (length(existing_cols) > 0) {
+    # Calculate row means across the 3 domain columns
+    df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
+    cat("  Processed:", target, "\n")
+  } else {
+    cat("  WARNING: No source columns found for", target, "\n")
+  }
+}
+
+cat("\n=== CALCULATION COMPLETE ===\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
+# This function can be run multiple times to check different random rows
+
+qa_check_random_row <- function() {
+  # Pick a random row
+  random_row <- sample(1:nrow(df), 1)
+  
+  cat("\n========================================\n")
+  cat("QA CHECK: Random Row #", random_row, "\n")
+  cat("========================================\n\n")
+  
+  # Check each of the 4 target columns
+  for (target in target_cols) {
+    source_group <- source_groups[[target]]
+    
+    cat(sprintf("Target: %s\n", target))
+    cat("  Source columns:\n")
+    
+    # Get values from source columns
+    values <- numeric(3)
+    for (i in 1:3) {
+      col <- source_group[i]
+      val <- if (col %in% names(df)) df[random_row, col] else NA
+      values[i] <- val
+      cat(sprintf("    %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
+    }
+    
+    # Calculate expected mean
+    valid_values <- values[!is.na(values)]
+    if (length(valid_values) > 0) {
+      expected_mean <- mean(valid_values)
+      actual_value <- df[random_row, target]
+      
+      cat(sprintf("\n  Calculation:\n"))
+      cat(sprintf("    Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
+      cat(sprintf("    Average of %d values: %.5f\n", length(valid_values), expected_mean))
+      cat(sprintf("    Target value: %.5f\n", actual_value))
+      cat(sprintf("    Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
+    } else {
+      cat("    No valid values to calculate mean.\n")
+    }
+    cat("\n")
+  }
+  
+  cat("========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on first random row
+cat("\n\n")
+qa_check_random_row()
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
+cat("Run this command in R console:\n")
+cat("  qa_check_random_row()\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
+# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
+cat("Review the output above, then uncomment line 163 to save changes.\n")
+cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,298 @@
+# Script to compute AOT and CRT scales in eohi2.csv
+# AOT: Reverse codes items 4-7, then averages all 8 items
+# CRT: Calculates proportion of correct and intuitive responses
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define source columns
+aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
+crt_cols <- c("crt_1", "crt_2", "crt_3")
+
+# Define target columns
+target_cols <- c("aot_total", "crt_correct", "crt_int")
+
+# Define correct and intuitive CRT answers
+crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
+crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Check AOT columns
+missing_aot <- aot_cols[!aot_cols %in% df_cols]
+existing_aot <- aot_cols[aot_cols %in% df_cols]
+
+cat("AOT Source Columns:\n")
+cat("  Expected: 8 columns\n")
+cat("  Found:", length(existing_aot), "columns\n")
+cat("  Missing:", length(missing_aot), "columns\n")
+
+if (length(missing_aot) > 0) {
+  cat("\n  Missing AOT columns:\n")
+  for (col in missing_aot) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check CRT columns
+missing_crt <- crt_cols[!crt_cols %in% df_cols]
+existing_crt <- crt_cols[crt_cols %in% df_cols]
+
+cat("\nCRT Source Columns:\n")
+cat("  Expected: 3 columns\n")
+cat("  Found:", length(existing_crt), "columns\n")
+cat("  Missing:", length(missing_crt), "columns\n")
+
+if (length(missing_crt) > 0) {
+  cat("\n  Missing CRT columns:\n")
+  for (col in missing_crt) {
+    cat("    -", col, "\n")
+  }
+}
+
+# Check target columns
+missing_targets <- target_cols[!target_cols %in% df_cols]
+existing_targets <- target_cols[target_cols %in% df_cols]
+
+cat("\nTarget Columns:\n")
+cat("  Expected: 3 columns\n")
+cat("  Found:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  Missing target columns:\n")
+  for (col in missing_targets) {
+    cat("    -", col, "\n")
+  }
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
+  stop("ERROR: Too many columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# ============= PROCESS AOT SCALE =============
+cat("Processing AOT scale...\n")
+
+# Convert AOT columns to numeric (handling any non-numeric values)
+for (col in aot_cols) {
+  if (col %in% names(df)) {
+    df[[col]] <- as.numeric(df[[col]])
+  }
+}
+
+# Calculate average with reverse coding (WITHOUT modifying original values)
+# Items 4, 5, 6, 7 are reverse coded for calculation only
+df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
+  # Create a copy for calculation
+  values <- as.numeric(row)
+  
+  # Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
+  reverse_positions <- c(4, 5, 6, 7)
+  values[reverse_positions] <- values[reverse_positions] * -1
+  
+  # Return mean (na.rm = TRUE handles missing values)
+  mean(values, na.rm = TRUE)
+})
+
+cat("  AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
+cat("  Original AOT item values preserved in dataframe.\n\n")
+
+# ============= PROCESS CRT SCALES =============
+cat("Processing CRT scales...\n")
+
+# Initialize CRT columns
+df$crt_correct <- NA
+df$crt_int <- NA
+
+# Process each row
+for (i in 1:nrow(df)) {
+  # CRT Correct
+  crt_correct_count <- 0
+  crt_correct_n <- 0
+  
+  for (j in 1:3) {
+    col <- crt_cols[j]
+    if (col %in% names(df)) {
+      response <- trimws(tolower(as.character(df[i, col])))
+      correct_answer <- tolower(crt_correct_answers[j])
+      
+      if (!is.na(response) && response != "") {
+        crt_correct_n <- crt_correct_n + 1
+        if (response == correct_answer) {
+          crt_correct_count <- crt_correct_count + 1
+        }
+      }
+    }
+  }
+  
+  # Calculate proportion correct
+  if (crt_correct_n > 0) {
+    df$crt_correct[i] <- crt_correct_count / crt_correct_n
+  }
+  
+  # CRT Intuitive
+  crt_int_count <- 0
+  crt_int_n <- 0
+  
+  for (j in 1:3) {
+    col <- crt_cols[j]
+    if (col %in% names(df)) {
+      response <- trimws(tolower(as.character(df[i, col])))
+      intuitive_answer <- tolower(crt_intuitive_answers[j])
+      
+      if (!is.na(response) && response != "") {
+        crt_int_n <- crt_int_n + 1
+        if (response == intuitive_answer) {
+          crt_int_count <- crt_int_count + 1
+        }
+      }
+    }
+  }
+  
+  # Calculate proportion intuitive
+  if (crt_int_n > 0) {
+    df$crt_int[i] <- crt_int_count / crt_int_n
+  }
+}
+
+cat("  CRT correct and intuitive scores calculated.\n\n")
+
+cat("=== PROCESSING COMPLETE ===\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
+# This function can be run multiple times to check different random rows
+
+qa_check_random_row <- function() {
+  # Pick a random row
+  random_row <- sample(1:nrow(df), 1)
+  
+  cat("\n========================================\n")
+  cat("QA CHECK: Random Row #", random_row, "\n")
+  cat("========================================\n\n")
+  
+  # AOT Check
+  cat("--- AOT SCALE ---\n")
+  cat("Source values (original in CSV):\n")
+  aot_original <- numeric(8)
+  aot_for_calc <- numeric(8)
+  
+  for (i in 1:8) {
+    col <- aot_cols[i]
+    val <- if (col %in% names(df)) df[random_row, col] else NA
+    aot_original[i] <- val
+    
+    # Apply reversal for items 4-7
+    if (i %in% 4:7) {
+      aot_for_calc[i] <- val * -1
+      cat(sprintf("  %s: %s (reversed to %s for calculation)\n", 
+                  col, 
+                  ifelse(is.na(val), "NA", as.character(val)),
+                  ifelse(is.na(val), "NA", as.character(val * -1))))
+    } else {
+      aot_for_calc[i] <- val
+      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
+    }
+  }
+  
+  # Manual calculation check
+  valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
+  if (length(valid_aot) > 0) {
+    expected_mean <- mean(valid_aot)
+    actual_value <- df$aot_total[random_row]
+    cat(sprintf("\nCalculation check:\n"))
+    cat(sprintf("  Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
+    cat(sprintf("  Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
+    cat(sprintf("  Target value (aot_total): %.5f\n", actual_value))
+    cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
+  } else {
+    cat("\n  No valid AOT values to calculate.\n")
+  }
+  
+  # CRT Check
+  cat("\n--- CRT SCALE ---\n")
+  cat("Source values:\n")
+  crt_correct_count <- 0
+  crt_int_count <- 0
+  crt_n <- 0
+  
+  for (i in 1:3) {
+    col <- crt_cols[i]
+    val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
+    val_trimmed <- trimws(tolower(val))
+    
+    correct_ans <- crt_correct_answers[i]
+    intuitive_ans <- crt_intuitive_answers[i]
+    
+    is_correct <- val_trimmed == tolower(correct_ans)
+    is_intuitive <- val_trimmed == tolower(intuitive_ans)
+    
+    if (val_trimmed != "" && !is.na(val_trimmed)) {
+      crt_n <- crt_n + 1
+      if (is_correct) crt_correct_count <- crt_correct_count + 1
+      if (is_intuitive) crt_int_count <- crt_int_count + 1
+    }
+    
+    cat(sprintf("  %s: '%s'\n", col, val))
+    cat(sprintf("    Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
+    cat(sprintf("    Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
+  }
+  
+  cat("\nCalculation check:\n")
+  if (crt_n > 0) {
+    expected_correct <- crt_correct_count / crt_n
+    expected_int <- crt_int_count / crt_n
+    actual_correct <- df$crt_correct[random_row]
+    actual_int <- df$crt_int[random_row]
+    
+    cat(sprintf("  Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
+    cat(sprintf("  Target value (crt_correct): %.5f\n", actual_correct))
+    cat(sprintf("  Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
+    
+    cat(sprintf("\n  Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
+    cat(sprintf("  Target value (crt_int): %.5f\n", actual_int))
+    cat(sprintf("  Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
+  } else {
+    cat("  No valid CRT responses to calculate.\n")
+  }
+  
+  cat("\n========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on first random row
+cat("\n\n")
+qa_check_random_row()
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
+cat("Run this command in R console:\n")
+cat("  qa_check_random_row()\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
+write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
+cat("Review the output above, then uncomment line 253 to save changes.\n")
+cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
--- a/differences.r
+++ b/differences.r
@ -0,0 +1,292 @@
+# Script to calculate absolute differences between time intervals in eohi2.csv
+# Compares present vs past/future, and 5-year vs 10-year intervals
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define the 15 item names (same order for all time periods)
+items <- c(
+  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
+  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
+  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
+)
+
+# Note: present uses lowercase "tv", others use uppercase "TV"
+items_present <- c(
+  "pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
+  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
+  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
+)
+
+# Define all source columns (75 total)
+source_cols <- c(
+  paste0("present_", items_present),
+  paste0("past_5_", items),
+  paste0("past_10_", items),
+  paste0("fut_5_", items),
+  paste0("fut_10_", items)
+)
+
+# Define all target columns (90 total = 6 calculation types × 15 items)
+target_NPast_5 <- paste0("NPast_5_", items)
+target_NPast_10 <- paste0("NPast_10_", items)
+target_NFut_5 <- paste0("NFut_5_", items)
+target_NFut_10 <- paste0("NFut_10_", items)
+target_5_10past <- paste0("5.10past_", items)
+target_5_10fut <- paste0("5.10fut_", items)
+
+target_cols <- c(
+  target_NPast_5,
+  target_NPast_10,
+  target_NFut_5,
+  target_NFut_10,
+  target_5_10past,
+  target_5_10fut
+)
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Check Source columns
+missing_source <- source_cols[!source_cols %in% df_cols]
+existing_source <- source_cols[source_cols %in% df_cols]
+
+cat("Source Columns:\n")
+cat("  Expected: 75 columns\n")
+cat("  Found:", length(existing_source), "columns\n")
+cat("  Missing:", length(missing_source), "columns\n")
+
+if (length(missing_source) > 0 && length(missing_source) <= 10) {
+  cat("\n  Missing source columns:\n")
+  for (col in missing_source) {
+    cat("    -", col, "\n")
+  }
+} else if (length(missing_source) > 10) {
+  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
+}
+
+# Check Target columns
+missing_targets <- target_cols[!target_cols %in% df_cols]
+existing_targets <- target_cols[target_cols %in% df_cols]
+
+cat("\nTarget Columns:\n")
+cat("  Expected: 90 columns\n")
+cat("  Found:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  Target columns do NOT exist yet - will create them.\n")
+  if (length(existing_targets) > 0) {
+    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
+  }
+} else {
+  cat("  All target columns exist - will overwrite with calculated values.\n")
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_source) > 30) {
+  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# ============= CALCULATE DIFFERENCES =============
+cat("Calculating time interval differences...\n")
+
+# Convert source columns to numeric
+for (col in source_cols) {
+  if (col %in% names(df)) {
+    df[[col]] <- as.numeric(df[[col]])
+  }
+}
+
+# Helper function to calculate absolute difference
+calc_abs_diff <- function(col1, col2) {
+  val1 <- if (col1 %in% names(df)) df[[col1]] else NA
+  val2 <- if (col2 %in% names(df)) df[[col2]] else NA
+  abs(val1 - val2)
+}
+
+# Calculate NPast_5: |present - past_5|
+cat("  Calculating NPast_5 differences (present vs past 5 years)...\n")
+for (i in 1:15) {
+  target <- target_NPast_5[i]
+  source1 <- paste0("present_", items_present[i])
+  source2 <- paste0("past_5_", items[i])
+  df[[target]] <- calc_abs_diff(source1, source2)
+}
+
+# Calculate NPast_10: |present - past_10|
+cat("  Calculating NPast_10 differences (present vs past 10 years)...\n")
+for (i in 1:15) {
+  target <- target_NPast_10[i]
+  source1 <- paste0("present_", items_present[i])
+  source2 <- paste0("past_10_", items[i])
+  df[[target]] <- calc_abs_diff(source1, source2)
+}
+
+# Calculate NFut_5: |present - fut_5|
+cat("  Calculating NFut_5 differences (present vs future 5 years)...\n")
+for (i in 1:15) {
+  target <- target_NFut_5[i]
+  source1 <- paste0("present_", items_present[i])
+  source2 <- paste0("fut_5_", items[i])
+  df[[target]] <- calc_abs_diff(source1, source2)
+}
+
+# Calculate NFut_10: |present - fut_10|
+cat("  Calculating NFut_10 differences (present vs future 10 years)...\n")
+for (i in 1:15) {
+  target <- target_NFut_10[i]
+  source1 <- paste0("present_", items_present[i])
+  source2 <- paste0("fut_10_", items[i])
+  df[[target]] <- calc_abs_diff(source1, source2)
+}
+
+# Calculate 5.10past: |past_5 - past_10|
+cat("  Calculating 5.10past differences (past 5 vs past 10 years)...\n")
+for (i in 1:15) {
+  target <- target_5_10past[i]
+  source1 <- paste0("past_5_", items[i])
+  source2 <- paste0("past_10_", items[i])
+  df[[target]] <- calc_abs_diff(source1, source2)
+}
+
+# Calculate 5.10fut: |fut_5 - fut_10|
+cat("  Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
+for (i in 1:15) {
+  target <- target_5_10fut[i]
+  source1 <- paste0("fut_5_", items[i])
+  source2 <- paste0("fut_10_", items[i])
+  df[[target]] <- calc_abs_diff(source1, source2)
+}
+
+cat("\n=== CALCULATION COMPLETE ===\n")
+cat("  90 difference columns created.\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
+# This function can be run multiple times to check different random rows and items
+
+qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
+  # Pick a random row or use specified row
+  if (is.null(row_num)) {
+    random_row <- sample(seq_len(nrow(df)), 1)
+    cat("\n========================================\n")
+    cat("QA CHECK: Random Row #", random_row, "\n")
+  } else {
+    if (row_num < 1 || row_num > nrow(df)) {
+      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
+      return()
+    }
+    random_row <- row_num
+    cat("\n========================================\n")
+    cat("QA CHECK: Specified Row #", random_row, "\n")
+  }
+  
+  # Pick a random item or use specified item
+  if (is.null(item_num)) {
+    test_item_idx <- sample(1:15, 1)
+    cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
+  } else {
+    if (item_num < 1 || item_num > 15) {
+      cat("ERROR: Item number must be between 1 and 15\n")
+      return()
+    }
+    test_item_idx <- item_num
+    cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
+  }
+  
+  cat("========================================\n\n")
+  
+  calculations <- list(
+    list(name = "NPast_5", target = target_NPast_5[test_item_idx], 
+         source1 = paste0("present_", items_present[test_item_idx]), 
+         source2 = paste0("past_5_", items[test_item_idx]),
+         desc = "|present - past_5|"),
+    list(name = "NPast_10", target = target_NPast_10[test_item_idx], 
+         source1 = paste0("present_", items_present[test_item_idx]), 
+         source2 = paste0("past_10_", items[test_item_idx]),
+         desc = "|present - past_10|"),
+    list(name = "NFut_5", target = target_NFut_5[test_item_idx], 
+         source1 = paste0("present_", items_present[test_item_idx]), 
+         source2 = paste0("fut_5_", items[test_item_idx]),
+         desc = "|present - fut_5|"),
+    list(name = "NFut_10", target = target_NFut_10[test_item_idx], 
+         source1 = paste0("present_", items_present[test_item_idx]), 
+         source2 = paste0("fut_10_", items[test_item_idx]),
+         desc = "|present - fut_10|"),
+    list(name = "5.10past", target = target_5_10past[test_item_idx], 
+         source1 = paste0("past_5_", items[test_item_idx]), 
+         source2 = paste0("past_10_", items[test_item_idx]),
+         desc = "|past_5 - past_10|"),
+    list(name = "5.10fut", target = target_5_10fut[test_item_idx], 
+         source1 = paste0("fut_5_", items[test_item_idx]), 
+         source2 = paste0("fut_10_", items[test_item_idx]),
+         desc = "|fut_5 - fut_10|")
+  )
+  
+  for (calc in calculations) {
+    cat(sprintf("--- %s ---\n", calc$name))
+    cat(sprintf("Formula: %s\n", calc$desc))
+    
+    val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
+    val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
+    target_val <- df[random_row, calc$target]
+    
+    cat(sprintf("  %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
+    cat(sprintf("  %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
+    
+    if (!is.na(val1) && !is.na(val2)) {
+      expected_diff <- abs(val1 - val2)
+      cat(sprintf("\n  Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
+      cat(sprintf("  Target (%s): %.5f\n", calc$target, target_val))
+      cat(sprintf("  Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
+    } else {
+      cat("  Cannot calculate (missing values)\n")
+    }
+    cat("\n")
+  }
+  
+  cat("========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on random row and random item
+cat("\n\n")
+qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
+cat("For random row AND random item, run:\n")
+cat("  qa_check_random_row()\n")
+cat("\nFor specific row (e.g., row 118) with random item:\n")
+cat("  qa_check_random_row(118)\n")
+cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
+cat("  qa_check_random_row(item_num = 5)\n")
+cat("\nFor specific row AND specific item:\n")
+cat("  qa_check_random_row(118, 5)\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
+# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
+cat("Review the output above, then uncomment line 243 to save changes.\n")
+cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,265 @@
+# Script to calculate domain means for time interval differences in eohi2.csv
+# Averages the 5 items within each domain (pref, pers, val) for each time interval type
+
+# Load necessary library
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data (with check.names=FALSE to preserve original column names)
+# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
+df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
+
+# Define the 15 item names (same order for all time periods)
+items <- c(
+  "pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
+  "pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
+  "val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
+)
+
+# Define domain groupings (indices in items vector)
+pref_indices <- 1:5
+pers_indices <- 6:10
+val_indices <- 11:15
+
+# Define time interval prefixes
+time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
+
+# Define domain names
+domain_names <- c("pref", "pers", "val")
+
+# Define all source columns (90 total)
+source_cols <- c(
+  paste0("NPast_5_", items),
+  paste0("NPast_10_", items),
+  paste0("NFut_5_", items),
+  paste0("NFut_10_", items),
+  paste0("X5.10past_", items),
+  paste0("X5.10fut_", items)
+)
+
+# Define all target columns (18 total = 6 time intervals × 3 domains)
+target_cols <- c(
+  paste0("NPast_5_", domain_names, "_MEAN"),
+  paste0("NPast_10_", domain_names, "_MEAN"),
+  paste0("NFut_5_", domain_names, "_MEAN"),
+  paste0("NFut_10_", domain_names, "_MEAN"),
+  paste0("X5.10past_", domain_names, "_MEAN"),
+  paste0("X5.10fut_", domain_names, "_MEAN")
+)
+
+# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
+cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
+
+# Get actual column names from dataframe (trimmed)
+df_cols <- trimws(names(df))
+
+# Check Source columns
+missing_source <- source_cols[!source_cols %in% df_cols]
+existing_source <- source_cols[source_cols %in% df_cols]
+
+cat("Source Columns:\n")
+cat("  Expected: 90 columns\n")
+cat("  Found:", length(existing_source), "columns\n")
+cat("  Missing:", length(missing_source), "columns\n")
+
+if (length(missing_source) > 0 && length(missing_source) <= 20) {
+  cat("\n  Missing source columns:\n")
+  for (col in missing_source) {
+    cat("    -", col, "\n")
+  }
+} else if (length(missing_source) > 20) {
+  cat("\n  Too many missing to list individually (", length(missing_source), "missing)\n")
+}
+
+# Check Target columns
+missing_targets <- target_cols[!target_cols %in% df_cols]
+existing_targets <- target_cols[target_cols %in% df_cols]
+
+cat("\nTarget Columns:\n")
+cat("  Expected: 18 columns\n")
+cat("  Found:", length(existing_targets), "columns\n")
+cat("  Missing:", length(missing_targets), "columns\n")
+
+if (length(missing_targets) > 0) {
+  cat("\n  Target columns do NOT exist yet - will create them.\n")
+  if (length(existing_targets) > 0) {
+    cat("  WARNING: Some target columns already exist and will be overwritten.\n")
+  }
+} else {
+  cat("  All target columns exist - will overwrite with calculated values.\n")
+}
+
+cat("\n=== END CHECK ===\n\n")
+
+# Stop if critical columns are missing
+if (length(missing_source) > 45) {
+  stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
+}
+
+cat("Proceeding with processing...\n\n")
+
+# ============= CALCULATE DOMAIN MEANS =============
+cat("Calculating domain means for time interval differences...\n")
+
+# Convert source columns to numeric
+for (col in source_cols) {
+  if (col %in% names(df)) {
+    df[[col]] <- as.numeric(df[[col]])
+  }
+}
+
+# Calculate means for each time interval × domain combination
+for (time_prefix in time_prefixes) {
+  # Preferences mean
+  pref_cols <- paste0(time_prefix, "_", items[pref_indices])
+  existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
+  if (length(existing_pref_cols) > 0) {
+    df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
+    cat("  Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
+  }
+  
+  # Personality mean
+  pers_cols <- paste0(time_prefix, "_", items[pers_indices])
+  existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
+  if (length(existing_pers_cols) > 0) {
+    df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
+    cat("  Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
+  }
+  
+  # Values mean
+  val_cols <- paste0(time_prefix, "_", items[val_indices])
+  existing_val_cols <- val_cols[val_cols %in% names(df)]
+  if (length(existing_val_cols) > 0) {
+    df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
+    cat("  Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
+  }
+}
+
+cat("\n=== CALCULATION COMPLETE ===\n")
+cat("  18 domain mean columns created.\n\n")
+
+
+# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
+# This function can be run multiple times to check different random rows and time intervals
+
+qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
+  # Pick a random row or use specified row
+  if (is.null(row_num)) {
+    random_row <- sample(seq_len(nrow(df)), 1)
+    cat("\n========================================\n")
+    cat("QA CHECK: Random Row #", random_row, "\n")
+  } else {
+    if (row_num < 1 || row_num > nrow(df)) {
+      cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
+      return()
+    }
+    random_row <- row_num
+    cat("\n========================================\n")
+    cat("QA CHECK: Specified Row #", random_row, "\n")
+  }
+  
+  # Pick a random time interval or use specified interval
+  if (is.null(time_interval_num)) {
+    test_interval_idx <- sample(1:6, 1)
+    cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
+  } else {
+    if (time_interval_num < 1 || time_interval_num > 6) {
+      cat("ERROR: Time interval number must be between 1 and 6\n")
+      cat("  1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
+      return()
+    }
+    test_interval_idx <- time_interval_num
+    cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
+  }
+  
+  cat("========================================\n\n")
+  
+  time_prefix <- time_prefixes[test_interval_idx]
+  
+  # Check each of the 3 domains
+  for (domain_idx in 1:3) {
+    domain_name <- domain_names[domain_idx]
+    
+    # Get the appropriate item indices
+    if (domain_idx == 1) {
+      item_indices <- pref_indices
+      domain_label <- "Preferences"
+    } else if (domain_idx == 2) {
+      item_indices <- pers_indices
+      domain_label <- "Personality"
+    } else {
+      item_indices <- val_indices
+      domain_label <- "Values"
+    }
+    
+    cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
+    
+    # Get source column names
+    source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
+    target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
+    
+    # Get values
+    values <- numeric(5)
+    cat("Source values:\n")
+    for (i in 1:5) {
+      col <- source_cols_domain[i]
+      val <- if (col %in% names(df)) df[random_row, col] else NA
+      values[i] <- val
+      cat(sprintf("  %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
+    }
+    
+    # Calculate expected mean
+    valid_values <- values[!is.na(values)]
+    if (length(valid_values) > 0) {
+      expected_mean <- mean(valid_values)
+      actual_value <- df[random_row, target_col]
+      
+      cat(sprintf("\nCalculation:\n"))
+      cat(sprintf("  Sum: %s = %.5f\n", 
+                  paste(sprintf("%.5f", valid_values), collapse = " + "), 
+                  sum(valid_values)))
+      cat(sprintf("  Average of %d values: %.5f\n", length(valid_values), expected_mean))
+      cat(sprintf("  Target (%s): %.5f\n", target_col, actual_value))
+      cat(sprintf("  Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
+    } else {
+      cat("  No valid values to calculate mean.\n")
+    }
+    cat("\n")
+  }
+  
+  cat("========================================\n")
+  cat("END QA CHECK\n")
+  cat("========================================\n\n")
+}
+
+# Run QA check on random row and random time interval
+cat("\n\n")
+qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
+
+# Instructions for running additional checks
+cat("\n")
+cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
+cat("For random row AND random time interval, run:\n")
+cat("  qa_check_random_row()\n")
+cat("\nFor specific row (e.g., row 118) with random interval:\n")
+cat("  qa_check_random_row(118)\n")
+cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
+cat("  qa_check_random_row(time_interval_num = 3)\n")
+cat("\nFor specific row AND specific interval:\n")
+cat("  qa_check_random_row(118, 3)\n")
+cat("\n")
+cat("Time Interval Numbers:\n")
+cat("  1 = NPast_5,  2 = NPast_10,  3 = NFut_5\n")
+cat("  4 = NFut_10,  5 = X5.10past,  6 = X5.10fut\n")
+cat("\n")
+
+
+# Save the modified dataframe back to CSV
+# na="" writes NA values as empty cells instead of "NA" text
+# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
+write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
+
+cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
+cat("Review the output above, then uncomment line 234 to save changes.\n")
+cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,95 @@
+# Script 08: Create 5_10 DGEN Variables
+# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
+#          for both Past and Future directions
+# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
+
+library(tidyverse)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Read the data
+data <- read.csv("eohi2.csv")
+
+print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
+print(paste("Number of participants:", length(unique(data$ResponseId))))
+
+# Verify source columns exist
+source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
+                 "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
+                 "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
+                 "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
+
+missing_vars <- source_vars[!source_vars %in% colnames(data)]
+if (length(missing_vars) > 0) {
+  stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
+}
+
+print("All source DGEN variables found!")
+
+# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
+# Formula: |DGEN_5 - DGEN_10|
+# NOTE: Using X prefix because R adds it to column names starting with numbers
+
+# PAST direction
+data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
+data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
+data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
+
+# FUTURE direction
+data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
+data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
+data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
+
+# Verify variables were created
+target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
+                 "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
+
+print("\n=== VARIABLES CREATED ===")
+print(target_vars)
+
+# Check for missing values
+for(var in target_vars) {
+  n_missing <- sum(is.na(data[[var]]))
+  pct_missing <- round(100 * n_missing / nrow(data), 2)
+  print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
+}
+
+# Quality check: Display sample rows
+print("\n=== QUALITY CHECK: Sample Calculations ===")
+sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
+
+for(i in sample_rows) {
+  print(sprintf("\nRow %d:", i))
+  print(sprintf("  DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f", 
+                data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
+  print(sprintf("  → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
+                data$X5_10DGEN_past_pref[i],
+                abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
+  
+  print(sprintf("  DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
+                data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
+  print(sprintf("  → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
+                data$X5_10DGEN_fut_pers[i],
+                abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
+}
+
+# Descriptive statistics
+print("\n=== DESCRIPTIVE STATISTICS ===")
+desc_stats <- data %>%
+  summarise(across(all_of(target_vars),
+                   list(n = ~sum(!is.na(.)),
+                        mean = ~round(mean(., na.rm = TRUE), 5),
+                        sd = ~round(sd(., na.rm = TRUE), 5),
+                        min = ~round(min(., na.rm = TRUE), 5),
+                        max = ~round(max(., na.rm = TRUE), 5)),
+                   .names = "{.col}_{.fn}"))
+
+print(t(desc_stats))
+
+# Save to CSV
+write.csv(data, "eohi2.csv", row.names = FALSE)
+
+print("\n=== PROCESSING COMPLETE ===")
+print("Data saved to eohi2.csv")
+print(paste("Total columns now:", ncol(data)))
+
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,223 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load data
+data <- read.csv("eohi2.csv")
+
+# Set 1: NPast_5_mean (15 variables)
+data$NPast_5_mean <- rowMeans(data[, c(
+  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
+  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
+  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
+)], na.rm = TRUE)
+
+# Set 2: NPast_10_mean (15 variables)
+data$NPast_10_mean <- rowMeans(data[, c(
+  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
+  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
+  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
+)], na.rm = TRUE)
+
+# Set 3: NFut_5_mean (15 variables)
+data$NFut_5_mean <- rowMeans(data[, c(
+  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
+  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
+  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
+)], na.rm = TRUE)
+
+# Set 4: NFut_10_mean (15 variables)
+data$NFut_10_mean <- rowMeans(data[, c(
+  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
+  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
+  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
+)], na.rm = TRUE)
+
+# Set 5: X5.10past_mean (15 variables)
+data$X5.10past_mean <- rowMeans(data[, c(
+  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
+  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
+  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
+)], na.rm = TRUE)
+
+# Set 6: X5.10fut_mean (15 variables)
+data$X5.10fut_mean <- rowMeans(data[, c(
+  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
+  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
+  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
+)], na.rm = TRUE)
+
+# Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
+data$NPast_global_mean <- rowMeans(data[, c(
+  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
+  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
+  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
+  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
+  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
+  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
+)], na.rm = TRUE)
+
+# Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
+data$NFut_global_mean <- rowMeans(data[, c(
+  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
+  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
+  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
+  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
+  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
+  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
+)], na.rm = TRUE)
+
+# Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
+data$X5.10_global_mean <- rowMeans(data[, c(
+  "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
+  "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
+  "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
+  "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
+  "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
+  "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
+)], na.rm = TRUE)
+
+# Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
+data$N5_global_mean <- rowMeans(data[, c(
+  "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
+  "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
+  "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
+  "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
+  "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
+  "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
+)], na.rm = TRUE)
+
+# Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
+data$N10_global_mean <- rowMeans(data[, c(
+  "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
+  "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
+  "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
+  "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
+  "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
+  "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
+)], na.rm = TRUE)
+
+# Save the data
+write.csv(data, "eohi2.csv", row.names = FALSE)
+
+# ===== QA CODE: Check first 5 rows =====
+cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
+
+for (i in 1:min(5, nrow(data))) {
+  cat("--- Row", i, "---\n")
+  
+  # Set 1: NPast_5_mean
+  calc1 <- mean(as.numeric(data[i, c(
+    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
+    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
+    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
+  )]), na.rm = TRUE)
+  cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i], 
+      "| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
+  
+  # Set 2: NPast_10_mean
+  calc2 <- mean(as.numeric(data[i, c(
+    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
+    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
+    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
+  )]), na.rm = TRUE)
+  cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i], 
+      "| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
+  
+  # Set 3: NFut_5_mean
+  calc3 <- mean(as.numeric(data[i, c(
+    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
+    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
+    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
+  )]), na.rm = TRUE)
+  cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i], 
+      "| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
+  
+  # Set 4: NFut_10_mean
+  calc4 <- mean(as.numeric(data[i, c(
+    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
+    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
+    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
+  )]), na.rm = TRUE)
+  cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i], 
+      "| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
+  
+  # Set 5: X5.10past_mean
+  calc5 <- mean(as.numeric(data[i, c(
+    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
+    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
+    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
+  )]), na.rm = TRUE)
+  cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i], 
+      "| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
+  
+  # Set 6: X5.10fut_mean
+  calc6 <- mean(as.numeric(data[i, c(
+    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
+    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
+    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
+  )]), na.rm = TRUE)
+  cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i], 
+      "| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
+  
+  # Set 7: NPast_global_mean
+  calc7 <- mean(as.numeric(data[i, c(
+    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
+    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
+    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
+    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
+    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
+    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
+  )]), na.rm = TRUE)
+  cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
+  
+  # Set 8: NFut_global_mean
+  calc8 <- mean(as.numeric(data[i, c(
+    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
+    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
+    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
+    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
+    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
+    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
+  )]), na.rm = TRUE)
+  cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
+  
+  # Set 9: X5.10_global_mean
+  calc9 <- mean(as.numeric(data[i, c(
+    "X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
+    "X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
+    "X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
+    "X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
+    "X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
+    "X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
+  )]), na.rm = TRUE)
+  cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
+  
+  # Set 10: N5_global_mean
+  calc10 <- mean(as.numeric(data[i, c(
+    "NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
+    "NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
+    "NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
+    "NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
+    "NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
+    "NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
+  )]), na.rm = TRUE)
+  cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
+  
+  # Set 11: N10_global_mean
+  calc11 <- mean(as.numeric(data[i, c(
+    "NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
+    "NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
+    "NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
+    "NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
+    "NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
+    "NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
+  )]), na.rm = TRUE)
+  cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
+}
+
+cat("=== QA CHECK COMPLETE ===\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,115 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load data
+data <- read.csv("eohi2.csv")
+
+# Set 1: DGEN_past_5.10_mean (3 variables)
+data$DGEN_past_5.10_mean <- rowMeans(data[, c(
+  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
+)], na.rm = TRUE)
+
+# Set 2: DGEN_fut_5.10_mean (3 variables)
+data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
+  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
+)], na.rm = TRUE)
+
+# Set 3: DGENpast_global_mean (9 variables)
+data$DGENpast_global_mean <- rowMeans(data[, c(
+  "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
+  "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
+  "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
+)], na.rm = TRUE)
+
+# Set 4: DGENfut_global_mean (9 variables)
+data$DGENfut_global_mean <- rowMeans(data[, c(
+  "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
+  "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
+  "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
+)], na.rm = TRUE)
+
+# Set 5: DGEN_5_global_mean (6 variables)
+data$DGEN_5_global_mean <- rowMeans(data[, c(
+  "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
+  "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
+)], na.rm = TRUE)
+
+# Set 6: DGEN_10_global_mean (6 variables)
+data$DGEN_10_global_mean <- rowMeans(data[, c(
+  "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
+  "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
+)], na.rm = TRUE)
+
+# Set 7: DGEN_5.10_global_mean (6 variables)
+data$DGEN_5.10_global_mean <- rowMeans(data[, c(
+  "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
+  "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
+)], na.rm = TRUE)
+
+# Save the data
+write.csv(data, "eohi2.csv", row.names = FALSE)
+
+# ===== QA CODE: Check first 5 rows =====
+cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
+
+for (i in 1:min(5, nrow(data))) {
+  cat("--- Row", i, "---\n")
+  
+  # Set 1: DGEN_past_5.10_mean
+  calc1 <- mean(as.numeric(data[i, c(
+    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
+  )]), na.rm = TRUE)
+  cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i], 
+      "| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
+  
+  # Set 2: DGEN_fut_5.10_mean
+  calc2 <- mean(as.numeric(data[i, c(
+    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
+  )]), na.rm = TRUE)
+  cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i], 
+      "| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
+  
+  # Set 3: DGENpast_global_mean
+  calc3 <- mean(as.numeric(data[i, c(
+    "DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
+    "DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
+    "DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
+  )]), na.rm = TRUE)
+  cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
+  
+  # Set 4: DGENfut_global_mean
+  calc4 <- mean(as.numeric(data[i, c(
+    "DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
+    "DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
+    "DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
+  )]), na.rm = TRUE)
+  cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
+  
+  # Set 5: DGEN_5_global_mean
+  calc5 <- mean(as.numeric(data[i, c(
+    "DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
+    "DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
+  )]), na.rm = TRUE)
+  cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
+  
+  # Set 6: DGEN_10_global_mean
+  calc6 <- mean(as.numeric(data[i, c(
+    "DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
+    "DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
+  )]), na.rm = TRUE)
+  cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
+  
+  # Set 7: DGEN_5.10_global_mean
+  calc7 <- mean(as.numeric(data[i, c(
+    "X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
+    "X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
+  )]), na.rm = TRUE)
+  cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i], 
+      "| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
+}
+
+cat("=== QA CHECK COMPLETE ===\n")
--- a/eohi2/dataP
+++ b/eohi2/dataP
@ -0,0 +1,235 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load data
+data <- read.csv("eohi2.csv")
+
+# Create EHI difference variables (NPast - NFut) for different time intervals
+
+# === 5-YEAR PAST-FUTURE PAIRS ===
+# Preferences
+data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
+data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
+data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
+data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
+data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
+
+# Personality
+data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
+data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
+data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
+data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
+data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
+
+# Values
+data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
+data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
+data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
+data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
+data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
+
+# === 10-YEAR PAST-FUTURE PAIRS ===
+# Preferences
+data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
+data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
+data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
+data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
+data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
+
+# Personality
+data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
+data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
+data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
+data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
+data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
+
+# Values
+data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
+data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
+data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
+data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
+data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
+
+# === 5-10 YEAR CHANGE VARIABLES ===
+# Preferences
+data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
+data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
+data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
+data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
+data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
+
+# Personality
+data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
+data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
+data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
+data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
+data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
+
+# Values
+data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
+data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
+data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
+data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
+data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
+
+# QA: Verify calculations - FIRST 5 ROWS with detailed output
+cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
+
+cat("--- 5-YEAR VARIABLES ---\n")
+for (i in 1:5) {
+  cat(sprintf("\nRow %d:\n", i))
+  cat(sprintf("  pref_read:      %g - %g = %g | ehi5_pref_read = %g %s\n",
+              data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
+              data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
+              data$ehi5_pref_read[i],
+              ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  pref_music:     %g - %g = %g | ehi5_pref_music = %g %s\n",
+              data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
+              data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
+              data$ehi5_pref_music[i],
+              ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
+              data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
+              data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
+              data$ehi5_pers_extravert[i],
+              ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
+}
+
+cat("\n--- 10-YEAR VARIABLES ---\n")
+for (i in 1:5) {
+  cat(sprintf("\nRow %d:\n", i))
+  cat(sprintf("  pref_read:      %g - %g = %g | ehi10_pref_read = %g %s\n",
+              data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
+              data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
+              data$ehi10_pref_read[i],
+              ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  pref_music:     %g - %g = %g | ehi10_pref_music = %g %s\n",
+              data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
+              data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
+              data$ehi10_pref_music[i],
+              ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
+              data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
+              data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
+              data$ehi10_pers_extravert[i],
+              ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
+}
+
+cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
+for (i in 1:5) {
+  cat(sprintf("\nRow %d:\n", i))
+  cat(sprintf("  pref_read:      %g - %g = %g | ehi5.10_pref_read = %g %s\n",
+              data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
+              data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
+              data$ehi5.10_pref_read[i],
+              ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  pref_music:     %g - %g = %g | ehi5.10_pref_music = %g %s\n",
+              data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
+              data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
+              data$ehi5.10_pref_music[i],
+              ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
+              data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
+              data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
+              data$ehi5.10_pers_extravert[i],
+              ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
+}
+
+# Full QA check for all rows and all variables
+cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
+
+qa_pairs <- list(
+  # 5-year pairs
+  list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
+  list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
+  list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
+  list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
+  list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
+  list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
+  list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
+  list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
+  list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
+  list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
+  list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
+  list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
+  list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
+  list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
+  list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
+  
+  # 10-year pairs
+  list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
+  list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
+  list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
+  list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
+  list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
+  list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
+  list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
+  list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
+  list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
+  list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
+  list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
+  list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
+  list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
+  list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
+  list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
+  
+  # 5-10 year change pairs
+  list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
+  list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
+  list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
+  list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
+  list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
+  list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
+  list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
+  list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
+  list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
+  list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
+  list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
+  list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
+  list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
+  list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
+  list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
+)
+
+all_checks_passed <- TRUE
+
+for (pair in qa_pairs) {
+  # Calculate expected difference
+  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
+  
+  # Get actual value in target variable
+  actual_value <- data[[pair$target]]
+  
+  # Compare (allowing for floating point precision issues)
+  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
+  
+  if (length(discrepancies) > 0) {
+    cat(sprintf("FAIL: %s\n", pair$target))
+    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
+                length(discrepancies), 
+                paste(head(discrepancies, 10), collapse = ", ")))
+    
+    # Show first discrepancy details
+    row_num <- discrepancies[1]
+    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
+                row_num,
+                pair$npast, data[[pair$npast]][row_num],
+                pair$nfut, data[[pair$nfut]][row_num],
+                expected_diff[row_num],
+                pair$target, actual_value[row_num]))
+    all_checks_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
+  }
+}
+
+cat("\n")
+if (all_checks_passed) {
+  cat("*** ALL QA CHECKS PASSED ***\n")
+} else {
+  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
+}
+
+# Save updated dataset
+write.csv(data, "eohi2.csv", row.names = FALSE)
+cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,118 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load data
+data <- read.csv("eohi2.csv")
+
+# Create DGEN EHI difference variables (Past - Future) for different time intervals
+
+# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
+data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
+data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
+data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
+
+# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
+data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
+data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
+data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
+
+# QA: Verify calculations - FIRST 5 ROWS with detailed output
+cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
+
+cat("--- 5-YEAR DGEN VARIABLES ---\n")
+for (i in 1:5) {
+  cat(sprintf("\nRow %d:\n", i))
+  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
+              data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
+              data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
+              data$ehiDGEN_5_Pref[i],
+              ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
+              data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
+              data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
+              data$ehiDGEN_5_Pers[i],
+              ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
+              data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
+              data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
+              data$ehiDGEN_5_Val[i],
+              ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
+}
+
+cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
+for (i in 1:5) {
+  cat(sprintf("\nRow %d:\n", i))
+  cat(sprintf("  Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
+              data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
+              data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
+              data$ehiDGEN_10_Pref[i],
+              ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
+              data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
+              data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
+              data$ehiDGEN_10_Pers[i],
+              ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
+  cat(sprintf("  Val:  %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
+              data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
+              data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
+              data$ehiDGEN_10_Val[i],
+              ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
+}
+
+# Full QA check for all rows and all variables
+cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
+
+qa_pairs <- list(
+  # 5-year DGEN pairs
+  list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
+  list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
+  list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
+  
+  # 10-year DGEN pairs
+  list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
+  list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
+  list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
+)
+
+all_checks_passed <- TRUE
+
+for (pair in qa_pairs) {
+  # Calculate expected difference
+  expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
+  
+  # Get actual value in target variable
+  actual_value <- data[[pair$target]]
+  
+  # Compare (allowing for floating point precision issues)
+  discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
+  
+  if (length(discrepancies) > 0) {
+    cat(sprintf("FAIL: %s\n", pair$target))
+    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
+                length(discrepancies), 
+                paste(head(discrepancies, 10), collapse = ", ")))
+    
+    # Show first discrepancy details
+    row_num <- discrepancies[1]
+    cat(sprintf("  Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
+                row_num,
+                pair$npast, data[[pair$npast]][row_num],
+                pair$nfut, data[[pair$nfut]][row_num],
+                expected_diff[row_num],
+                pair$target, actual_value[row_num]))
+    all_checks_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
+  }
+}
+
+cat("\n")
+if (all_checks_passed) {
+  cat("*** ALL QA CHECKS PASSED ***\n")
+} else {
+  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
+}
+
+# Save updated dataset
+write.csv(data, "eohi2.csv", row.names = FALSE)
+cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,161 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load data
+data <- read.csv("eohi2.csv")
+
+# Calculate domain-specific mean scores for EHI variables across time intervals
+
+# === 5-YEAR MEANS ===
+data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music", 
+                                          "ehi5_pref_TV", "ehi5_pref_nap", 
+                                          "ehi5_pref_travel")], na.rm = TRUE)
+
+data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical", 
+                                          "ehi5_pers_dependable", "ehi5_pers_anxious", 
+                                          "ehi5_pers_complex")], na.rm = TRUE)
+
+data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad", 
+                                         "ehi5_val_opinion", "ehi5_val_performance", 
+                                         "ehi5_val_justice")], na.rm = TRUE)
+
+# === 10-YEAR MEANS ===
+data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music", 
+                                           "ehi10_pref_TV", "ehi10_pref_nap", 
+                                           "ehi10_pref_travel")], na.rm = TRUE)
+
+data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical", 
+                                           "ehi10_pers_dependable", "ehi10_pers_anxious", 
+                                           "ehi10_pers_complex")], na.rm = TRUE)
+
+data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad", 
+                                          "ehi10_val_opinion", "ehi10_val_performance", 
+                                          "ehi10_val_justice")], na.rm = TRUE)
+
+# === 5-10 YEAR CHANGE MEANS ===
+data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music", 
+                                             "ehi5.10_pref_TV", "ehi5.10_pref_nap", 
+                                             "ehi5.10_pref_travel")], na.rm = TRUE)
+
+data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", 
+                                             "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", 
+                                             "ehi5.10_pers_complex")], na.rm = TRUE)
+
+data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad", 
+                                            "ehi5.10_val_opinion", "ehi5.10_val_performance", 
+                                            "ehi5.10_val_justice")], na.rm = TRUE)
+
+# QA: Verify mean calculations
+cat("\n=== QUALITY ASSURANCE CHECK ===\n")
+cat("Verifying EHI domain-specific mean calculations\n\n")
+
+cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i], 
+            data$ehi5_pref_TV[i], data$ehi5_pref_nap[i], 
+            data$ehi5_pref_travel[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi5_pref_MEAN[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i], 
+            data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i], 
+            data$ehi5_pers_complex[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi5_pers_MEAN[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i], 
+            data$ehi10_pref_TV[i], data$ehi10_pref_nap[i], 
+            data$ehi10_pref_travel[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi10_pref_MEAN[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i], 
+            data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i], 
+            data$ehi5.10_pref_travel[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi5.10_pref_MEAN[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], vals[4], vals[5], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+# Overall QA check for all rows
+cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
+
+qa_checks <- list(
+  # 5-year means
+  list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
+       target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
+  list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
+       target = "ehi5_pers_MEAN", name = "5-Year Personality"),
+  list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
+       target = "ehi5_val_MEAN", name = "5-Year Values"),
+  
+  # 10-year means
+  list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
+       target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
+  list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
+       target = "ehi10_pers_MEAN", name = "10-Year Personality"),
+  list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
+       target = "ehi10_val_MEAN", name = "10-Year Values"),
+  
+  # 5-10 year change means
+  list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
+       target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
+  list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
+       target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
+  list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
+       target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
+)
+
+all_checks_passed <- TRUE
+
+for (check in qa_checks) {
+  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
+  actual_mean <- data[[check$target]]
+  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
+  
+  if (length(discrepancies) > 0) {
+    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
+    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
+                length(discrepancies), 
+                paste(head(discrepancies, 10), collapse = ", ")))
+    all_checks_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
+                check$name, length(check$vars), nrow(data)))
+  }
+}
+
+cat("\n")
+if (all_checks_passed) {
+  cat("*** ALL QA CHECKS PASSED ***\n")
+} else {
+  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
+}
+
+# Save updated dataset
+write.csv(data, "eohi2.csv", row.names = FALSE)
+cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,140 @@
+options(scipen = 999)
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+# Load data
+data <- read.csv("eohi2.csv")
+
+# Calculate global mean scores for EHI variables across time intervals
+
+# === DGEN 5-YEAR GLOBAL MEAN ===
+data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", 
+                                          "ehiDGEN_5_Val")], na.rm = TRUE)
+
+# === DGEN 10-YEAR GLOBAL MEAN ===
+data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", 
+                                           "ehiDGEN_10_Val")], na.rm = TRUE)
+
+# === 5-YEAR GLOBAL MEAN ===
+data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN", 
+                                            "ehi5_val_MEAN")], na.rm = TRUE)
+
+# === 10-YEAR GLOBAL MEAN ===
+data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN", 
+                                             "ehi10_val_MEAN")], na.rm = TRUE)
+
+# === 5-10 YEAR CHANGE GLOBAL MEAN ===
+data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", 
+                                               "ehi5.10_val_MEAN")], na.rm = TRUE)
+
+# QA: Verify mean calculations
+cat("\n=== QUALITY ASSURANCE CHECK ===\n")
+cat("Verifying EHI global mean calculations\n\n")
+
+cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i], 
+            data$ehiDGEN_5_Val[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehiDGEN_5_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i], 
+            data$ehiDGEN_10_Val[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehiDGEN_10_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i], 
+            data$ehi5_val_MEAN[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi5_global_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i], 
+            data$ehi10_val_MEAN[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi10_global_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
+for (i in 1:5) {
+  vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i], 
+            data$ehi5.10_val_MEAN[i])
+  calc_mean <- mean(vals, na.rm = TRUE)
+  actual_mean <- data$ehi5.10_global_mean[i]
+  match <- abs(calc_mean - actual_mean) < 1e-10
+  cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
+              i, vals[1], vals[2], vals[3], 
+              calc_mean, actual_mean, ifelse(match, "✓", "✗")))
+}
+
+# Overall QA check for all rows
+cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
+
+qa_checks <- list(
+  # DGEN global means
+  list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
+       target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
+  list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
+       target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
+  
+  # Domain-specific global means
+  list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
+       target = "ehi5_global_mean", name = "5-Year Global"),
+  list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
+       target = "ehi10_global_mean", name = "10-Year Global"),
+  list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
+       target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
+)
+
+all_checks_passed <- TRUE
+
+for (check in qa_checks) {
+  calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
+  actual_mean <- data[[check$target]]
+  discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
+  
+  if (length(discrepancies) > 0) {
+    cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
+    cat(sprintf("  Found %d discrepancies in rows: %s\n", 
+                length(discrepancies), 
+                paste(head(discrepancies, 10), collapse = ", ")))
+    all_checks_passed <- FALSE
+  } else {
+    cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n", 
+                check$name, length(check$vars), nrow(data)))
+  }
+}
+
+cat("\n")
+if (all_checks_passed) {
+  cat("*** ALL QA CHECKS PASSED ***\n")
+} else {
+  cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
+}
+
+# Save updated dataset
+write.csv(data, "eohi2.csv", row.names = FALSE)
+cat("\nDataset saved to eohi2.csv\n")
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,38 @@
+options(scipen = 999)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+data <- read.csv("eohi2.csv")
+
+# Check the levels of the demo_edu variable
+print(levels(factor(data$demo_edu)))
+
+# Also show the unique values and their frequencies
+print("\nUnique values and frequencies:")
+print(table(data$demo_edu, useNA = "ifany"))
+
+# Recode demo_edu into 3 ordinal levels
+data$edu3 <- NA
+
+# HS_TS: High School and Trade School
+data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
+
+# C_Ug: College and University - Undergraduate  
+data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
+
+# grad_prof: University - Graduate, University - PhD, and Professional Degree
+data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
+
+# Convert to ordered factor
+data$edu3 <- factor(data$edu3, 
+                    levels = c("HS_TS", "C_Ug", "grad_prof"),
+                    ordered = TRUE)
+
+# Check the recoded variable
+print(table(data$edu3, useNA = "ifany"))
+
+# Verify the recoding
+print(table(data$demo_edu, data$edu3, useNA = "ifany"))
+
+# Save the updated dataset with the new edu3 variable
+write.csv(data, "eohi2.csv", row.names = FALSE)
--- a/eohi2/datap
+++ b/eohi2/datap
@ -0,0 +1,100 @@
+options(scipen = 999)
+
+library(dplyr)
+
+setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
+
+df <- read.csv("eohi2.csv")
+
+# Display means and standard deviations of non-standardized variables for manual checking
+print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
+print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
+print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
+print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
+print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
+print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
+print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
+print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
+
+# Calculate means and standard deviations for standardization
+mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
+sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
+
+mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
+sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
+
+mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
+sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
+
+mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
+sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
+
+# Create standardized variables
+df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
+df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
+df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
+df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
+
+# Check that variables have been standardized
+print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
+print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
+print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
+print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
+print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
+print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
+print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
+print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
+
+# Calculate mean of standardized variables
+df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
+
+#### check random 10 rows
+
+# Check 10 random rows to verify calculations
+set.seed(123)  # For reproducible random selection
+random_rows <- sample(nrow(df), 10)
+
+cat("Checking 10 random rows:\n")
+cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
+cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
+
+for(i in random_rows) {
+  orig_5 <- df$ehiDGEN_5_mean[i]
+  std_5 <- df$stdDGEN_5[i]
+  calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
+  
+  orig_10 <- df$ehiDGEN_10_mean[i]
+  std_10 <- df$stdDGEN_10[i]
+  calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
+  
+  cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n", 
+              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
+}
+
+cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
+cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
+
+for(i in random_rows) {
+  orig_5 <- df$ehi5_global_mean[i]
+  std_5 <- df$stdDS_5[i]
+  calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
+  
+  orig_10 <- df$ehi10_global_mean[i]
+  std_10 <- df$stdDS_10[i]
+  calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
+  
+  cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n", 
+              i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
+}
+
+# Show the final stdEHI_mean for these rows
+cat("\nRow | stdEHI_mean | Manual calc\n")
+cat("----|-------------|------------\n")
+for(i in random_rows) {
+  manual_mean <- -0.042564413	-0.158849227	-1.444812436	-0.23426232	-0.470122099
+mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
+  cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
+}
+
+# Write to CSV
+write.csv(df, "eohi2.csv", row.names = FALSE)
--- a/eohi2/descriptive_statistics.csv
+++ b/eohi2/descriptive_statistics.csv
@ -0,0 +1,24 @@
+"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
+"ehiDGEN_5_Pref",1,489,0.06339,1.9954,0,0.03308,1.4826,-9,9,18,0.27576,4.00366,0.09024
+"ehiDGEN_5_Pers",2,489,0.09816,1.85725,0,0.1145,1.4826,-9,7,16,-0.18711,3.23028,0.08399
+"ehiDGEN_5_Val",3,489,0.02454,2.16042,0,0.06361,1.4826,-9,10,19,0.1765,4.44483,0.0977
+"ehiDGEN_10_Pref",4,489,0.20245,2.29618,0,0.18575,1.4826,-10,9,19,0.01558,3.90242,0.10384
+"ehiDGEN_10_Pers",5,489,0.29448,2.34328,0,0.24936,1.4826,-9,10,19,0.06997,2.70639,0.10597
+"ehiDGEN_10_Val",6,489,0.40491,2.32319,0,0.32061,1.4826,-8,10,18,0.37025,2.73426,0.10506
+"ehi5_pref_MEAN",7,489,0.07607,0.64562,0,0.05954,0.29652,-4.4,4,8.4,0.13291,10.10075,0.0292
+"ehi5_pers_MEAN",8,489,0.07454,0.74495,0,0.05267,0.59304,-2.4,3.6,6,0.58141,3.07505,0.03369
+"ehi5_val_MEAN",9,489,0.05194,0.58562,0,0.02748,0.29652,-2.6,3,5.6,0.62482,5.46128,0.02648
+"ehi10_pref_MEAN",10,489,0.10348,0.69093,0,0.09059,0.59304,-3.6,3.6,7.2,0.02685,4.20962,0.03124
+"ehi10_pers_MEAN",11,489,0.12168,0.75889,0,0.09389,0.59304,-3.6,3.8,7.4,0.14345,3.87988,0.03432
+"ehi10_val_MEAN",12,489,0.14397,0.71372,0,0.08193,0.29652,-2.4,4,6.4,1.25911,5.60017,0.03228
+"ehi5.10_pref_MEAN",13,489,0.13538,0.70635,0,0.12214,0.29652,-5.2,2.6,7.8,-0.87659,8.98815,0.03194
+"ehi5.10_pers_MEAN",14,489,0.18773,0.77513,0,0.16489,0.59304,-3.6,3.4,7,0.18903,2.50653,0.03505
+"ehi5.10_val_MEAN",15,489,0.12229,0.73027,0,0.08193,0.29652,-2.8,3.6,6.4,0.68131,4.79817,0.03302
+"ehiDGEN_5_mean",16,489,0.06203,1.45735,0,0.06361,0.9884,-7.66667,5.66667,13.33333,-0.33245,3.67417,0.0659
+"ehiDGEN_10_mean",17,489,0.30061,1.89245,0,0.28329,0.9884,-8,9,17,0.01127,3.38489,0.08558
+"ehi5_global_mean",18,489,0.06752,0.46201,0,0.0486,0.29652,-1.93333,3.33333,5.26667,0.93595,7.08224,0.02089
+"ehi10_global_mean",19,489,0.12304,0.52522,0,0.08494,0.29652,-2.46667,2.33333,4.8,0.55734,3.94213,0.02375
+"ehi5.10_global_mean",20,489,0.14847,0.50536,0.06667,0.12231,0.29652,-1.93333,2.8,4.73333,0.67593,4.05776,0.02285
+"aot_total",21,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
+"crt_correct",22,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
+"crt_int",23,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
--- a/eohi2/descriptive_statistics_domain_general_vars.csv
+++ b/eohi2/descriptive_statistics_domain_general_vars.csv
@ -0,0 +1,15 @@
+"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
+"DGEN_past_5_mean",1,489,3.70279,2.64159,3.33333,3.57422,2.9652,0,10,10,0.33519,-0.92598,0.11946
+"DGEN_past_10_mean",2,489,4.12747,2.69248,4,4.07209,3.4594,0,10,10,0.15303,-1.06997,0.12176
+"DGEN_fut_5_mean",3,489,3.64076,2.72097,3,3.47498,2.9652,0,10,10,0.45879,-0.91272,0.12305
+"DGEN_fut_10_mean",4,489,3.82686,2.67605,3.33333,3.6743,2.9652,0,10,10,0.41341,-0.85159,0.12101
+"DGEN_past_5.10_mean",5,489,1.10907,1.14357,0.66667,0.93384,0.9884,0,6.66667,6.66667,1.38388,2.20547,0.05171
+"DGEN_fut_5.10_mean",6,489,1.03272,1.16269,0.66667,0.83885,0.9884,0,7.33333,7.33333,1.82686,4.61137,0.05258
+"DGENpast_global_mean",7,489,2.97978,1.86338,3.11111,2.96409,2.30627,0,6.66667,6.66667,0.05122,-1.1105,0.08427
+"DGENfut_global_mean",8,489,2.83345,1.86526,2.66667,2.76788,2.30627,0,6.66667,6.66667,0.25867,-1.05797,0.08435
+"DGEN_5_global_mean",9,489,3.67178,2.58067,3.16667,3.53859,2.9652,0,10,10,0.38068,-0.90478,0.1167
+"DGEN_10_global_mean",10,489,3.97716,2.51197,3.83333,3.89992,2.9652,0,10,10,0.23861,-0.92036,0.1136
+"DGEN_5.10_global_mean",11,489,1.07089,0.95646,0.83333,0.95335,0.9884,0,5,5,1.13335,1.28416,0.04325
+"aot_total",12,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
+"crt_correct",13,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
+"crt_int",14,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
--- a/Show More
+++ b/Show More