eohi3-updates #3

Merged
ira merged 3 commits from eohi3-updates into master 2026-01-26 16:30:11 -05:00
178 changed files with 24078 additions and 0 deletions
Showing only changes of commit 4c3d96a61b - Show all commits

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.history

55
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,55 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "R-Debugger",
"name": "Launch R-Workspace",
"request": "launch",
"debugMode": "workspace",
"workingDirectory": "${workspaceFolder}",
"splitOverwrittenOutput": true
},
{
"type": "R-Debugger",
"name": "Debug R-File",
"request": "launch",
"debugMode": "file",
"workingDirectory": "${workspaceFolder}",
"file": "${file}",
"splitOverwrittenOutput": true,
"stopOnEntry": false
},
{
"type": "R-Debugger",
"name": "Debug R-Function",
"request": "launch",
"debugMode": "function",
"workingDirectory": "${workspaceFolder}",
"file": "${file}",
"mainFunction": "main",
"allowGlobalDebugging": false,
"splitOverwrittenOutput": true
},
{
"type": "R-Debugger",
"name": "Debug R-Package",
"request": "launch",
"debugMode": "workspace",
"workingDirectory": "${workspaceFolder}",
"includePackageScopes": true,
"loadPackages": [
"."
],
"splitOverwrittenOutput": true
},
{
"type": "R-Debugger",
"request": "attach",
"name": "Attach to R process",
"splitOverwrittenOutput": true
}
]
}

BIN
anova tables.pptx Normal file

Binary file not shown.

Binary file not shown.

118
eohi1/BS_means.vb Normal file
View File

@ -0,0 +1,118 @@
Option Explicit
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
If IsError(m) Then
GetColIndex = 0
Else
GetColIndex = CLng(m)
End If
End Function
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
Dim tmp() As Long
ReDim tmp(0 To UBound(headers))
Dim i As Long, c As Long
c = 0
For i = LBound(headers) To UBound(headers)
Dim colIdx As Long
colIdx = GetColIndex(CStr(headers(i)), ws)
If colIdx > 0 Then
tmp(c) = colIdx
c = c + 1
End If
Next i
If c = 0 Then
BuildPresentColArray = Array()
Else
Dim outArr() As Long
ReDim outArr(0 To c - 1)
For i = 0 To c - 1
outArr(i) = tmp(i)
Next i
BuildPresentColArray = outArr
End If
End Function
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
Dim i As Long
Dim sumVals As Double
Dim countVals As Long
sumVals = 0
countVals = 0
If IsArray(colIndexes) Then
For i = LBound(colIndexes) To UBound(colIndexes)
Dim v As Variant
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
If Not IsError(v) Then
If IsNumeric(v) Then
sumVals = sumVals + CDbl(v)
countVals = countVals + 1
End If
End If
Next i
End If
If countVals = 0 Then
MeanOfRow = CVErr(xlErrNA)
Else
MeanOfRow = sumVals / countVals
End If
End Function
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
Dim c As Long
c = GetColIndex(headerName, ws)
If c = 0 Then
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
c = lastCol + 1
ws.Cells(1, c).Value = headerName
End If
EnsureOutputColumn = c
End Function
Sub BS_Means()
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim all28 As Variant
all28 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim easy14 As Variant
easy14 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
)
Dim hard14 As Variant
hard14 = Array( _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
colsAll = BuildPresentColArray(all28, ws)
colsEasy = BuildPresentColArray(easy14, ws)
colsHard = BuildPresentColArray(hard14, ws)
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
colBS28 = EnsureOutputColumn(ws, "bs_28")
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
colBSHard = EnsureOutputColumn(ws, "bs_hard")
Dim lastRow As Long
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
Dim r As Long
For r = 2 To lastRow
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
Next r
End Sub

View File

@ -0,0 +1,25 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load the data
exp1_data <- read.csv("exp1.csv")
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
# Save the updated data
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
# Display summary of the calculated totals
cat("NPast_mean_total summary:\n")
summary(exp1_data$NPast_mean_total)
cat("\nNFut_mean_total summary:\n")
summary(exp1_data$NFut_mean_total)
# Show first few rows to verify calculations
cat("\nFirst 5 rows of calculated totals:\n")
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])

View File

@ -0,0 +1,17 @@
,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
ehi_global_mean,0.31***,,,,,,,,,,,
sex_dummy,0.0086,0.044,,,,,,,,,,
demo_age_1,-0.12***,-0.19***,0.0094,,,,,,,,,
edu_num,-0.0081,0.009,-0.062*,-0.023,,,,,,,,
AOT_total,0.11***,0.086***,-0.042,0.11,0.035,,,,,,,
CRT_correct,0.068*,0.051,-0.15***,-0.049,0.13***,0.26***,,,,,,
CRT_int,-0.058,-0.057,0.16***,0.071*,-0.12***,-0.21***,-0.87***,,,,,
bs_28,-0.01,-0.006803247,-0.0022,-0.071*,-0.065*,-0.27***,-0.25***,0.23***,,,,
bs_easy,-0.0088,0.014,-0.037,-0.30***,-0.009,-0.31***,-0.094***,0.047,0.61***,,,
bs_hard,-0.015,-0.024,0.022,0.095**,-0.085**,-0.15***,-0.24***,0.25***,0.87***,0.18***,,
cal_selfActual,0.015,-0.051,-0.15***,0.18***,0.12***,-0.041,0.047,-0.053,0.31***,0.05315529,0.35***,
cal_global,0.019,-0.0047,-0.078*,0.031,0.021,-0.16***,-0.15***,0.13***,0.76***,0.37***,0.72***,0.55***
,,,,,,,,,,,,
*p<0.05,,,,,,,,,,,,
**p<0.01,,,,,,,,,,,,
***p<0.001,,,,,,,,,,,,
1 eohiDGEN_mean ehi_global_mean sex_dummy demo_age_1 edu_num AOT_total CRT_correct CRT_int bs_28 bs_easy bs_hard cal_selfActual
1 eohiDGEN_mean ehi_global_mean sex_dummy demo_age_1 edu_num AOT_total CRT_correct CRT_int bs_28 bs_easy bs_hard cal_selfActual
2 ehi_global_mean 0.31***
3 sex_dummy 0.0086 0.044
4 demo_age_1 -0.12*** -0.19*** 0.0094
5 edu_num -0.0081 0.009 -0.062* -0.023
6 AOT_total 0.11*** 0.086*** -0.042 0.11 0.035
7 CRT_correct 0.068* 0.051 -0.15*** -0.049 0.13*** 0.26***
8 CRT_int -0.058 -0.057 0.16*** 0.071* -0.12*** -0.21*** -0.87***
9 bs_28 -0.01 -0.006803247 -0.0022 -0.071* -0.065* -0.27*** -0.25*** 0.23***
10 bs_easy -0.0088 0.014 -0.037 -0.30*** -0.009 -0.31*** -0.094*** 0.047 0.61***
11 bs_hard -0.015 -0.024 0.022 0.095** -0.085** -0.15*** -0.24*** 0.25*** 0.87*** 0.18***
12 cal_selfActual 0.015 -0.051 -0.15*** 0.18*** 0.12*** -0.041 0.047 -0.053 0.31*** 0.05315529 0.35***
13 cal_global 0.019 -0.0047 -0.078* 0.031 0.021 -0.16*** -0.15*** 0.13*** 0.76*** 0.37*** 0.72*** 0.55***
14
15 *p<0.05
16 **p<0.01
17 ***p<0.001

View File

@ -0,0 +1,18 @@
,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
ehi_global_mean,3.36865E-25,,,,,,,,,,,
sex_dummy,0.778486413,0.154074007,,,,,,,,,,
demo_age_1,5.04502E-05,1.68576E-10,0.759436702,,,,,,,,,
edu_num,0.79326152,0.768960745,0.043945088,0.454222403,,,,,,,,
AOT_total,0.000229114,0.004867793,0.168503786,0.000314432,0.259313829,,,,,,,
CRT_correct,0.027291283,0.099344285,4.65922E-07,0.108342391,3.46509E-05,4.3812E-18,,,,,,
CRT_int,0.05842806,0.065472289,1.53701E-07,0.021280157,6.99182E-05,3.99893E-12,0,,,,,
bs_28,0.735913365,0.824825013,0.94333124,0.020232959,0.035132191,1.09048E-18,1.61283E-16,1.17159E-13,,,,
bs_easy,0.775943639,0.653590097,0.22951034,3.86076E-23,0.77008947,1.15661E-24,0.002210577,0.123278134,6.2215E-108,,,
bs_hard,0.622307404,0.443340859,0.469287124,0.001914319,0.005383608,7.67838E-07,9.71752E-16,2.00781E-16,0,2.79128E-09,,
cal_selfActual,0.623530057,0.098794041,8.23097E-07,2.62161E-09,0.000133768,0.183121705,0.125377421,0.088656149,2.63226E-25,0.083521419,5.04214E-32,
cal_global,0.543555525,0.879493859,0.011276459,0.318757547,0.487961367,3.0437E-07,8.00711E-07,1.20286E-05,1.4315E-202,3.1393E-35,1.2757E-171,4.92187E-86
,,,,,,,,,,,,
,,,,,,,,,,,,
,p<0.05,,,,,,,,,,,
,p<0.01,,,,,,,,,,,
,p<0.001,,,,,,,,,,,
1 eohiDGEN_mean ehi_global_mean sex_dummy demo_age_1 edu_num AOT_total CRT_correct CRT_int bs_28 bs_easy bs_hard cal_selfActual
1 eohiDGEN_mean ehi_global_mean sex_dummy demo_age_1 edu_num AOT_total CRT_correct CRT_int bs_28 bs_easy bs_hard cal_selfActual
2 ehi_global_mean 3.36865E-25
3 sex_dummy 0.778486413 0.154074007
4 demo_age_1 5.04502E-05 1.68576E-10 0.759436702
5 edu_num 0.79326152 0.768960745 0.043945088 0.454222403
6 AOT_total 0.000229114 0.004867793 0.168503786 0.000314432 0.259313829
7 CRT_correct 0.027291283 0.099344285 4.65922E-07 0.108342391 3.46509E-05 4.3812E-18
8 CRT_int 0.05842806 0.065472289 1.53701E-07 0.021280157 6.99182E-05 3.99893E-12 0
9 bs_28 0.735913365 0.824825013 0.94333124 0.020232959 0.035132191 1.09048E-18 1.61283E-16 1.17159E-13
10 bs_easy 0.775943639 0.653590097 0.22951034 3.86076E-23 0.77008947 1.15661E-24 0.002210577 0.123278134 6.2215E-108
11 bs_hard 0.622307404 0.443340859 0.469287124 0.001914319 0.005383608 7.67838E-07 9.71752E-16 2.00781E-16 0 2.79128E-09
12 cal_selfActual 0.623530057 0.098794041 8.23097E-07 2.62161E-09 0.000133768 0.183121705 0.125377421 0.088656149 2.63226E-25 0.083521419 5.04214E-32
13 cal_global 0.543555525 0.879493859 0.011276459 0.318757547 0.487961367 3.0437E-07 8.00711E-07 1.20286E-05 1.4315E-202 3.1393E-35 1.2757E-171 4.92187E-86
14
15
16 p<0.05
17 p<0.01
18 p<0.001

View File

@ -0,0 +1,65 @@
<html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Two-Item Reliability Summary</h2><p>Pearson r: 0.34049 (95% CI: [0.28622, 0.39258])</p><p>Spearman r: 0.31052 (95% CI: [0.25516, 0.36386])</p><p>SpearmanBrown / Cronbach's α: 0.50801 (95% CI: [0.44506, 0.56382])</p><h2>Cronbach's Alpha</h2><table>
<thead>
<tr>
<th style="text-align:left;"> </th>
<th style="text-align:right;"> raw_alpha </th>
<th style="text-align:right;"> std.alpha </th>
<th style="text-align:right;"> G6(smc) </th>
<th style="text-align:right;"> average_r </th>
<th style="text-align:right;"> S/N </th>
<th style="text-align:right;"> ase </th>
<th style="text-align:right;"> mean </th>
<th style="text-align:right;"> sd </th>
<th style="text-align:right;"> median_r </th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left;"> </td>
<td style="text-align:right;"> 0.3078741 </td>
<td style="text-align:right;"> 0.5080098 </td>
<td style="text-align:right;"> 0.3404914 </td>
<td style="text-align:right;"> 0.3404914 </td>
<td style="text-align:right;"> 1.032561 </td>
<td style="text-align:right;"> 0.0216938 </td>
<td style="text-align:right;"> 0.2755268 </td>
<td style="text-align:right;"> 0.98504 </td>
<td style="text-align:right;"> 0.3404914 </td>
</tr>
</tbody>
</table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.50801</p><h2>Item-Level Statistics</h2><table>
<thead>
<tr>
<th style="text-align:left;"> </th>
<th style="text-align:right;"> n </th>
<th style="text-align:right;"> raw.r </th>
<th style="text-align:right;"> std.r </th>
<th style="text-align:right;"> r.cor </th>
<th style="text-align:right;"> r.drop </th>
<th style="text-align:right;"> mean </th>
<th style="text-align:right;"> sd </th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left;"> eohiDGEN_mean </td>
<td style="text-align:right;"> 1063 </td>
<td style="text-align:right;"> 0.9706327 </td>
<td style="text-align:right;"> 0.8186853 </td>
<td style="text-align:right;"> 0.4777163 </td>
<td style="text-align:right;"> 0.3404914 </td>
<td style="text-align:right;"> 0.4148824 </td>
<td style="text-align:right;"> 1.740598 </td>
</tr>
<tr>
<td style="text-align:left;"> ehi_global_mean </td>
<td style="text-align:right;"> 1063 </td>
<td style="text-align:right;"> 0.5566839 </td>
<td style="text-align:right;"> 0.8186853 </td>
<td style="text-align:right;"> 0.4777163 </td>
<td style="text-align:right;"> 0.3404914 </td>
<td style="text-align:right;"> 0.1361712 </td>
<td style="text-align:right;"> 0.504053 </td>
</tr>
</tbody>
</table></body></html>

BIN
eohi1/Rplots.pdf Normal file

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

After

Width:  |  Height:  |  Size: 17 KiB

BIN
eohi1/age_DGEN_plot.png Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 444 KiB

After

Width:  |  Height:  |  Size: 444 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

After

Width:  |  Height:  |  Size: 18 KiB

BIN
eohi1/age_domain_plot.png Normal file

Binary file not shown.

Before

Width:  |  Height:  |  Size: 530 KiB

After

Width:  |  Height:  |  Size: 530 KiB

View File

@ -0,0 +1,164 @@
# Assumption Checks Before Cronbach's Alpha Analysis
# Run this BEFORE the main reliability analysis
library(psych)
library(corrplot)
library(ggplot2)
# Read the data
data <- read.csv("exp1.csv")
# Define scale variables
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
"NPastDiff_val_performance", "NPastDiff_val_justice")
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
"NPastDiff_life_important", "NPastDiff_life_change")
# Function to check assumptions for a scale
check_assumptions <- function(data, var_names, scale_name) {
cat("\n", "="*60, "\n")
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
cat("="*60, "\n")
# Get scale data
scale_data <- data[, var_names]
# 1. Sample size check
complete_cases <- sum(complete.cases(scale_data))
cat("1. SAMPLE SIZE CHECK:\n")
cat(" Total participants:", nrow(data), "\n")
cat(" Complete cases:", complete_cases, "\n")
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
if(complete_cases < 30) {
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
return(FALSE)
}
# 2. Missing data check
cat("\n2. MISSING DATA CHECK:\n")
missing_counts <- colSums(is.na(scale_data))
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
cat(" Missing data by item:\n")
for(i in 1:length(var_names)) {
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
}
max_missing <- max(missing_pct)
cat(" Maximum missing:", max_missing, "%\n")
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
# 3. Use only complete cases for remaining checks
complete_data <- scale_data[complete.cases(scale_data), ]
# 4. Normality check (Shapiro-Wilk test on first item as example)
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
shapiro_result <- shapiro.test(complete_data[, 1])
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
} else {
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
}
# 5. Inter-item correlations check
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
cor_matrix <- cor(complete_data)
# Get off-diagonal correlations
cor_matrix[lower.tri(cor_matrix)] <- NA
diag(cor_matrix) <- NA
cors <- as.vector(cor_matrix)
cors <- cors[!is.na(cors)]
positive_cors <- sum(cors > 0)
strong_cors <- sum(cors > 0.30)
negative_cors <- sum(cors < 0)
cat(" Total correlations:", length(cors), "\n")
cat(" Positive correlations:", positive_cors, "\n")
cat(" Strong correlations (>0.30):", strong_cors, "\n")
cat(" Negative correlations:", negative_cors, "\n")
cat(" Mean correlation:", round(mean(cors), 4), "\n")
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
if(negative_cors > 0) {
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
}
if(strong_cors / length(cors) < 0.5) {
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
}
# 6. Item variance check
cat("\n5. ITEM VARIANCE CHECK:\n")
item_vars <- apply(complete_data, 2, var)
var_ratio <- max(item_vars) / min(item_vars)
cat(" Item variances:", round(item_vars, 4), "\n")
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
# 7. Outlier check
cat("\n6. OUTLIER CHECK:\n")
# Check for multivariate outliers using Mahalanobis distance
if(nrow(complete_data) > ncol(complete_data)) {
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
outliers <- sum(mahal_dist > outlier_threshold)
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
}
# 8. Summary recommendation
cat("\n7. OVERALL RECOMMENDATION:\n")
issues <- 0
if(complete_cases < 30) issues <- issues + 1
if(max_missing >= 20) issues <- issues + 1
if(negative_cors > 0) issues <- issues + 1
if(var_ratio >= 4) issues <- issues + 1
if(issues == 0) {
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
} else if(issues <= 2) {
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
} else {
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
}
return(TRUE)
}
# Check assumptions for all past scales
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
cat("\nData: exp1.csv")
cat("\nTotal sample size:", nrow(data))
check_assumptions(data, past_pref_vars, "Past Preferences")
check_assumptions(data, past_pers_vars, "Past Personality")
check_assumptions(data, past_val_vars, "Past Values")
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
# Quick check of future scales (you can expand this)
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
check_assumptions(data, fut_pref_vars, "Future Preferences")
cat("\n", "="*60, "\n")
cat("GENERAL GUIDELINES:\n")
cat("="*60, "\n")
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
cat("⚠️ If some assumptions are violated, interpret with caution\n")
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
cat(" - Omega coefficient (more robust to violations)\n")
cat(" - Split-half reliability\n")
cat(" - Test-retest reliability\n")
cat(" - Factor analysis to check dimensionality\n")

141
eohi1/brierVARS.vb Normal file
View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

57
eohi1/corr_bs_all.csv Normal file
View File

@ -0,0 +1,57 @@
group,var_x,var_y,n,r,p
BS_vs_Cal,bs_28,cal_global,1063,0.761895605,2.37E-202
BS_vs_Cal,bs_easy,cal_global,1063,0.367329651,2.67E-35
BS_vs_Cal,bs_hard,cal_global,1063,0.721638057,1.16E-171
BS_vs_Cal,bs_28,cal_selfActual,1063,0.312906942,1.41E-25
BS_vs_Cal,bs_easy,cal_selfActual,1063,0.054307733,0.07675131
BS_vs_Cal,bs_hard,cal_selfActual,1063,0.351630149,2.70E-32
BS_vs_EOHI,bs_28,ehi_global_mean,1063,-0.005868845,0.848429656
BS_vs_EOHI,bs_easy,ehi_global_mean,1063,0.014251763,0.642549738
BS_vs_EOHI,bs_hard,ehi_global_mean,1063,-0.022683413,0.460035927
Cal_vs_EOHI,cal_global,ehi_global_mean,1063,-0.004912877,0.872888653
Cal_vs_EOHI,cal_selfActual,ehi_global_mean,1063,-0.049727552,0.105147189
BS_vs_EOHI,bs_28,ehi_life_mean,1063,-0.007684801,0.802384957
BS_vs_EOHI,bs_easy,ehi_life_mean,1063,-0.024372981,0.427293321
BS_vs_EOHI,bs_hard,ehi_life_mean,1063,-0.001049006,0.972748582
Cal_vs_EOHI,cal_global,ehi_life_mean,1063,-0.002906394,0.924594545
Cal_vs_EOHI,cal_selfActual,ehi_life_mean,1063,0.028247567,0.357533744
BS_vs_EOHI,bs_28,ehi_pers_mean,1063,-0.00684928,0.823495332
BS_vs_EOHI,bs_easy,ehi_pers_mean,1063,-0.009992311,0.744870091
BS_vs_EOHI,bs_hard,ehi_pers_mean,1063,-0.013361509,0.663459912
Cal_vs_EOHI,cal_global,ehi_pers_mean,1063,-0.009160631,0.765455012
Cal_vs_EOHI,cal_selfActual,ehi_pers_mean,1063,-0.082807345,0.006907147
BS_vs_EOHI,bs_28,ehi_pref_mean,1063,-0.009153426,0.765634093
BS_vs_EOHI,bs_easy,ehi_pref_mean,1063,0.038767348,0.20660862
BS_vs_EOHI,bs_hard,ehi_pref_mean,1063,-0.033018506,0.282127883
Cal_vs_EOHI,cal_global,ehi_pref_mean,1063,0.009717642,0.751649131
Cal_vs_EOHI,cal_selfActual,ehi_pref_mean,1063,-0.044228709,0.149576975
BS_vs_EOHI,bs_28,ehi_val_mean,1063,0.011477853,0.708558862
BS_vs_EOHI,bs_easy,ehi_val_mean,1063,0.048673437,0.11273791
BS_vs_EOHI,bs_hard,ehi_val_mean,1063,-0.008316877,0.786509127
Cal_vs_EOHI,cal_global,ehi_val_mean,1063,-0.024447239,0.425886059
Cal_vs_EOHI,cal_selfActual,ehi_val_mean,1063,-0.099629152,0.001143742
BS_vs_EOHI,bs_28,eohiDGEN_life,1063,-0.011893558,0.69851102
BS_vs_EOHI,bs_easy,eohiDGEN_life,1063,-0.087676604,0.004226705
BS_vs_EOHI,bs_hard,eohiDGEN_life,1063,0.036722132,0.231590633
Cal_vs_EOHI,cal_global,eohiDGEN_life,1063,0.004140068,0.892751642
Cal_vs_EOHI,cal_selfActual,eohiDGEN_life,1063,0.036877484,0.229620994
BS_vs_EOHI,bs_28,eohiDGEN_mean,1063,-0.008492209,0.782120675
BS_vs_EOHI,bs_easy,eohiDGEN_mean,1063,-0.007458252,0.808095459
BS_vs_EOHI,bs_hard,eohiDGEN_mean,1063,-0.013570736,0.658521255
Cal_vs_EOHI,cal_global,eohiDGEN_mean,1063,0.019739833,0.520291178
Cal_vs_EOHI,cal_selfActual,eohiDGEN_mean,1063,0.016902147,0.582001734
BS_vs_EOHI,bs_28,eohiDGEN_pers,1063,-0.013831247,0.652392826
BS_vs_EOHI,bs_easy,eohiDGEN_pers,1063,-0.013771822,0.653788727
BS_vs_EOHI,bs_hard,eohiDGEN_pers,1063,-0.013837654,0.652242395
Cal_vs_EOHI,cal_global,eohiDGEN_pers,1063,-0.010393397,0.735006441
Cal_vs_EOHI,cal_selfActual,eohiDGEN_pers,1063,-0.028354225,0.355720599
BS_vs_EOHI,bs_28,eohiDGEN_pref,1063,0.002305478,0.940152072
BS_vs_EOHI,bs_easy,eohiDGEN_pref,1063,0.014268945,0.642148877
BS_vs_EOHI,bs_hard,eohiDGEN_pref,1063,-0.006108935,0.842308668
Cal_vs_EOHI,cal_global,eohiDGEN_pref,1063,0.024379777,0.427164421
Cal_vs_EOHI,cal_selfActual,eohiDGEN_pref,1063,0.008387657,0.784736723
BS_vs_EOHI,bs_28,eohiDGEN_val,1063,-0.043411125,0.157255062
BS_vs_EOHI,bs_easy,eohiDGEN_val,1063,-0.058087693,0.058325548
BS_vs_EOHI,bs_hard,eohiDGEN_val,1063,-0.032241181,0.293618158
Cal_vs_EOHI,cal_global,eohiDGEN_val,1063,-0.032038903,0.296658895
Cal_vs_EOHI,cal_selfActual,eohiDGEN_val,1063,0.035471252,0.247887232
1 group var_x var_y n r p
1 group var_x var_y n r p
2 BS_vs_Cal bs_28 cal_global 1063 0.761895605 2.37E-202
3 BS_vs_Cal bs_easy cal_global 1063 0.367329651 2.67E-35
4 BS_vs_Cal bs_hard cal_global 1063 0.721638057 1.16E-171
5 BS_vs_Cal bs_28 cal_selfActual 1063 0.312906942 1.41E-25
6 BS_vs_Cal bs_easy cal_selfActual 1063 0.054307733 0.07675131
7 BS_vs_Cal bs_hard cal_selfActual 1063 0.351630149 2.70E-32
8 BS_vs_EOHI bs_28 ehi_global_mean 1063 -0.005868845 0.848429656
9 BS_vs_EOHI bs_easy ehi_global_mean 1063 0.014251763 0.642549738
10 BS_vs_EOHI bs_hard ehi_global_mean 1063 -0.022683413 0.460035927
11 Cal_vs_EOHI cal_global ehi_global_mean 1063 -0.004912877 0.872888653
12 Cal_vs_EOHI cal_selfActual ehi_global_mean 1063 -0.049727552 0.105147189
13 BS_vs_EOHI bs_28 ehi_life_mean 1063 -0.007684801 0.802384957
14 BS_vs_EOHI bs_easy ehi_life_mean 1063 -0.024372981 0.427293321
15 BS_vs_EOHI bs_hard ehi_life_mean 1063 -0.001049006 0.972748582
16 Cal_vs_EOHI cal_global ehi_life_mean 1063 -0.002906394 0.924594545
17 Cal_vs_EOHI cal_selfActual ehi_life_mean 1063 0.028247567 0.357533744
18 BS_vs_EOHI bs_28 ehi_pers_mean 1063 -0.00684928 0.823495332
19 BS_vs_EOHI bs_easy ehi_pers_mean 1063 -0.009992311 0.744870091
20 BS_vs_EOHI bs_hard ehi_pers_mean 1063 -0.013361509 0.663459912
21 Cal_vs_EOHI cal_global ehi_pers_mean 1063 -0.009160631 0.765455012
22 Cal_vs_EOHI cal_selfActual ehi_pers_mean 1063 -0.082807345 0.006907147
23 BS_vs_EOHI bs_28 ehi_pref_mean 1063 -0.009153426 0.765634093
24 BS_vs_EOHI bs_easy ehi_pref_mean 1063 0.038767348 0.20660862
25 BS_vs_EOHI bs_hard ehi_pref_mean 1063 -0.033018506 0.282127883
26 Cal_vs_EOHI cal_global ehi_pref_mean 1063 0.009717642 0.751649131
27 Cal_vs_EOHI cal_selfActual ehi_pref_mean 1063 -0.044228709 0.149576975
28 BS_vs_EOHI bs_28 ehi_val_mean 1063 0.011477853 0.708558862
29 BS_vs_EOHI bs_easy ehi_val_mean 1063 0.048673437 0.11273791
30 BS_vs_EOHI bs_hard ehi_val_mean 1063 -0.008316877 0.786509127
31 Cal_vs_EOHI cal_global ehi_val_mean 1063 -0.024447239 0.425886059
32 Cal_vs_EOHI cal_selfActual ehi_val_mean 1063 -0.099629152 0.001143742
33 BS_vs_EOHI bs_28 eohiDGEN_life 1063 -0.011893558 0.69851102
34 BS_vs_EOHI bs_easy eohiDGEN_life 1063 -0.087676604 0.004226705
35 BS_vs_EOHI bs_hard eohiDGEN_life 1063 0.036722132 0.231590633
36 Cal_vs_EOHI cal_global eohiDGEN_life 1063 0.004140068 0.892751642
37 Cal_vs_EOHI cal_selfActual eohiDGEN_life 1063 0.036877484 0.229620994
38 BS_vs_EOHI bs_28 eohiDGEN_mean 1063 -0.008492209 0.782120675
39 BS_vs_EOHI bs_easy eohiDGEN_mean 1063 -0.007458252 0.808095459
40 BS_vs_EOHI bs_hard eohiDGEN_mean 1063 -0.013570736 0.658521255
41 Cal_vs_EOHI cal_global eohiDGEN_mean 1063 0.019739833 0.520291178
42 Cal_vs_EOHI cal_selfActual eohiDGEN_mean 1063 0.016902147 0.582001734
43 BS_vs_EOHI bs_28 eohiDGEN_pers 1063 -0.013831247 0.652392826
44 BS_vs_EOHI bs_easy eohiDGEN_pers 1063 -0.013771822 0.653788727
45 BS_vs_EOHI bs_hard eohiDGEN_pers 1063 -0.013837654 0.652242395
46 Cal_vs_EOHI cal_global eohiDGEN_pers 1063 -0.010393397 0.735006441
47 Cal_vs_EOHI cal_selfActual eohiDGEN_pers 1063 -0.028354225 0.355720599
48 BS_vs_EOHI bs_28 eohiDGEN_pref 1063 0.002305478 0.940152072
49 BS_vs_EOHI bs_easy eohiDGEN_pref 1063 0.014268945 0.642148877
50 BS_vs_EOHI bs_hard eohiDGEN_pref 1063 -0.006108935 0.842308668
51 Cal_vs_EOHI cal_global eohiDGEN_pref 1063 0.024379777 0.427164421
52 Cal_vs_EOHI cal_selfActual eohiDGEN_pref 1063 0.008387657 0.784736723
53 BS_vs_EOHI bs_28 eohiDGEN_val 1063 -0.043411125 0.157255062
54 BS_vs_EOHI bs_easy eohiDGEN_val 1063 -0.058087693 0.058325548
55 BS_vs_EOHI bs_hard eohiDGEN_val 1063 -0.032241181 0.293618158
56 Cal_vs_EOHI cal_global eohiDGEN_val 1063 -0.032038903 0.296658895
57 Cal_vs_EOHI cal_selfActual eohiDGEN_val 1063 0.035471252 0.247887232

103
eohi1/correlation matrix.r Normal file
View File

@ -0,0 +1,103 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)

View File

@ -0,0 +1,31 @@
Variable1,Variable2,Spearman_r,P_value
ehi_global_mean,AOT_total,0.084899905,0.005609163
ehi_life_mean,AOT_total,0.072949943,0.017368706
ehi_pref_mean,CRT_correct,0.068917794,0.024639893
ehi_pref_mean,CRT_int,-0.064728497,0.034848248
eohiDGEN_life,AOT_total,0.117946108,0.000116007
eohiDGEN_mean,AOT_total,0.111022973,0.000287049
eohiDGEN_mean,CRT_correct,0.066645932,0.029798224
eohiDGEN_pers,AOT_total,0.091596281,0.002797567
eohiDGEN_pref,AOT_total,0.068355715,0.025838342
eohiDGEN_val,AOT_total,0.103955004,0.000687468
eohiDGEN_val,CRT_correct,0.074789341,0.014729792
eohiDGEN_val,CRT_int,-0.06044435,0.048816455
eohiDGEN_pref,CRT_int,-0.058185337,0.057903079
ehi_global_mean,CRT_int,-0.057457963,0.061112
eohiDGEN_mean,CRT_int,-0.057307383,0.061794359
eohiDGEN_pref,CRT_correct,0.053610168,0.080621938
ehi_global_mean,CRT_correct,0.051329314,0.094394596
ehi_pers_mean,CRT_correct,0.051328061,0.094402649
ehi_pref_mean,AOT_total,0.05039887,0.100528033
ehi_pers_mean,CRT_int,-0.049058279,0.109918146
eohiDGEN_life,CRT_correct,0.039494214,0.198218491
ehi_val_mean,CRT_int,-0.037910532,0.216825816
ehi_pers_mean,AOT_total,0.031316388,0.307691267
eohiDGEN_pers,CRT_correct,0.025658263,0.403319679
eohiDGEN_pers,CRT_int,-0.017319242,0.572720797
ehi_life_mean,CRT_correct,0.016392301,0.593440889
ehi_val_mean,AOT_total,0.015193625,0.620731537
ehi_val_mean,CRT_correct,0.00599121,0.845308845
ehi_life_mean,CRT_int,-0.005811627,0.849889753
eohiDGEN_life,CRT_int,-0.001303724,0.966135022
1 Variable1 Variable2 Spearman_r P_value
1 Variable1 Variable2 Spearman_r P_value
2 ehi_global_mean AOT_total 0.084899905 0.005609163
3 ehi_life_mean AOT_total 0.072949943 0.017368706
4 ehi_pref_mean CRT_correct 0.068917794 0.024639893
5 ehi_pref_mean CRT_int -0.064728497 0.034848248
6 eohiDGEN_life AOT_total 0.117946108 0.000116007
7 eohiDGEN_mean AOT_total 0.111022973 0.000287049
8 eohiDGEN_mean CRT_correct 0.066645932 0.029798224
9 eohiDGEN_pers AOT_total 0.091596281 0.002797567
10 eohiDGEN_pref AOT_total 0.068355715 0.025838342
11 eohiDGEN_val AOT_total 0.103955004 0.000687468
12 eohiDGEN_val CRT_correct 0.074789341 0.014729792
13 eohiDGEN_val CRT_int -0.06044435 0.048816455
14 eohiDGEN_pref CRT_int -0.058185337 0.057903079
15 ehi_global_mean CRT_int -0.057457963 0.061112
16 eohiDGEN_mean CRT_int -0.057307383 0.061794359
17 eohiDGEN_pref CRT_correct 0.053610168 0.080621938
18 ehi_global_mean CRT_correct 0.051329314 0.094394596
19 ehi_pers_mean CRT_correct 0.051328061 0.094402649
20 ehi_pref_mean AOT_total 0.05039887 0.100528033
21 ehi_pers_mean CRT_int -0.049058279 0.109918146
22 eohiDGEN_life CRT_correct 0.039494214 0.198218491
23 ehi_val_mean CRT_int -0.037910532 0.216825816
24 ehi_pers_mean AOT_total 0.031316388 0.307691267
25 eohiDGEN_pers CRT_correct 0.025658263 0.403319679
26 eohiDGEN_pers CRT_int -0.017319242 0.572720797
27 ehi_life_mean CRT_correct 0.016392301 0.593440889
28 ehi_val_mean AOT_total 0.015193625 0.620731537
29 ehi_val_mean CRT_correct 0.00599121 0.845308845
30 ehi_life_mean CRT_int -0.005811627 0.849889753
31 eohiDGEN_life CRT_int -0.001303724 0.966135022

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,81 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
wide_cal_eohi <- to_wide(corr_cal_eohi)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
print(corr_cal_eohi)
# Export a single CSV combining all sets
corr_bs_eohi$group <- "BS_vs_EOHI"
corr_bs_cal$group <- "BS_vs_Cal"
corr_cal_eohi$group <- "Cal_vs_EOHI"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,303 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
# Check for bimodal or unusual distributions
hist(df$eohi_pref)
hist(df$cal_selfActual)
# Look for extreme values
# boxplot(df$eohi_pref)
# boxplot(df$cal_selfActual)
# Test normality for each variable - probably unnecessary
library(nortest)
# Test EOHI variables
for(var in eohi_vars) {
cat("\n", var, "normality test:\n")
print(shapiro.test(df_complete[[var]]))
}
# Test calibration variables
for(var in cal_vars) {
cat("\n", var, "normality test:\n")
print(shapiro.test(df_complete[[var]]))
}
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")

View File

@ -0,0 +1,171 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load required libraries
library(corrplot)
library(Hmisc)
library(psych)
# Load the data
exp1_data <- read.csv("ehi1.csv")
# Define the two sets of variables
set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
# Create subset with only the variables of interest
correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
# ===== NORMALITY CHECKS =====
# Shapiro-Wilk tests for normality
for(var in names(correlation_data)) {
shapiro_result <- shapiro.test(correlation_data[[var]])
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
var, shapiro_result$p.value,
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
}
# Visual normality checks
pdf("normality_plots.pdf", width = 12, height = 8)
par(mfrow = c(2, 4))
for(var in names(correlation_data)) {
# Histogram with normal curve overlay
hist(correlation_data[[var]], main = paste("Histogram:", var),
xlab = var, freq = FALSE)
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
sd = sd(correlation_data[[var]], na.rm = TRUE)),
add = TRUE, col = "red", lwd = 2)
# Q-Q plot
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
qqline(correlation_data[[var]], col = "red", lwd = 2)
}
dev.off()
# ===== LINEARITY CHECKS =====
# Check linearity between variable pairs
pdf("linearity_plots.pdf", width = 15, height = 10)
par(mfrow = c(3, 5))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
# Scatter plot with regression line
plot(correlation_data[[var1]], correlation_data[[var2]],
main = paste(var1, "vs", var2),
xlab = var1, ylab = var2, pch = 16, cex = 0.6)
# Add linear regression line
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
abline(lm_fit, col = "red", lwd = 2)
# Add LOESS smooth line for non-linear pattern detection
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
loess_pred <- predict(loess_fit, x_seq)
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
# Calculate R-squared for linear fit
r_squared <- summary(lm_fit)$r.squared
cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
}
}
dev.off()
# Residual analysis for linearity
pdf("residual_plots.pdf", width = 15, height = 10)
par(mfrow = c(3, 5))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
residuals <- residuals(lm_fit)
fitted <- fitted(lm_fit)
plot(fitted, residuals,
main = paste("Residuals:", var1, "vs", var2),
xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
abline(h = 0, col = "red", lwd = 2)
# Add smooth line to residuals
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
}
}
dev.off()
# Calculate correlation matrix (Spearman only)
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
# Print correlation matrix with 5 decimal places
print(round(cor_matrix_spearman, 5))
# Separate correlations between the two sets (Spearman)
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
print(round(set1_set2_cor, 5))
# Calculate correlations within each set (Spearman)
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
# Statistical significance tests (Spearman)
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
p_val <- cor_test_results_spearman$P[var1, var2]
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
}
}
# Create correlation plot (Spearman only)
pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
corrplot(cor_matrix_spearman, method = "color", type = "upper",
order = "hclust", tl.cex = 0.8, tl.col = "black",
addCoef.col = "black", number.cex = 0.7,
title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
dev.off()
# Summary statistics
desc_stats <- describe(correlation_data)
print(round(desc_stats, 5))
# Save results to CSV files
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
# Save correlation results in a formatted table
cor_results <- data.frame(
Variable1 = character(),
Variable2 = character(),
Spearman_r = numeric(),
P_value = numeric(),
stringsAsFactors = FALSE
)
# Extract significant correlations between sets
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
r_val <- cor_matrix_spearman[var1, var2]
p_val <- cor_test_results_spearman$P[var1, var2]
cor_results <- rbind(cor_results, data.frame(
Variable1 = var1,
Variable2 = var2,
Spearman_r = r_val,
P_value = p_val,
stringsAsFactors = FALSE
))
}
}
write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)

View File

@ -0,0 +1,45 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load the data
exp1_data <- read.csv("exp1.csv")
# Define all NPastDiff and NFutDiff variables
all_diff_vars <- c(
"NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
"NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
"NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
"NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
"NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
"NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
"NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
"NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
)
# Define DGEN variables to average
dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
"futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
# Calculate domain_mean as average of all 40 variables
exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
# Calculate DGEN_mean as average of all 8 DGEN variables
exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
# Save the updated data
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
# Display summary of the calculated means
cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
summary(exp1_data$domain_mean)
cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
summary(exp1_data$DGEN_mean)
# Show first few rows to verify calculations
cat("\nFirst 5 rows of calculated means:\n")
print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
# Show the individual DGEN values for first 5 rows to verify math
cat("\nFirst 5 rows of individual DGEN values for verification:\n")
print(exp1_data[1:5, dgen_vars])

View File

@ -0,0 +1,104 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
ehi1 <- read.csv("ehi1.csv")
# Create EHI difference variables (NPast - NFut)
# Preferences
ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
# Personality
ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
# Values
ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
# Life satisfaction
ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
# QA: Verify calculations
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
qa_pairs <- list(
list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
)
all_checks_passed <- TRUE
for (pair in qa_pairs) {
# Calculate expected difference
expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
# Get actual value in target variable
actual_value <- ehi1[[pair$target]]
# Compare (allowing for floating point precision issues)
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s\n", pair$target))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
# Show first discrepancy details
row_num <- discrepancies[1]
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
row_num,
pair$npast, ehi1[[pair$npast]][row_num],
pair$nfut, ehi1[[pair$nfut]][row_num],
expected_diff[row_num],
pair$target, actual_value[row_num]))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(ehi1, "ehi1.csv", row.names = FALSE)
cat("\nDataset saved to ehi1.csv\n")

View File

@ -0,0 +1,167 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
ehi1 <- read.csv("ehi1.csv")
# Calculate mean scores for EHI variables
# 1. Preferences mean
ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music",
"ehi_pref_tv", "ehi_pref_nap",
"ehi_pref_travel")], na.rm = TRUE)
# 2. Personality mean
ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical",
"ehi_pers_dependable", "ehi_pers_anxious",
"ehi_pers_complex")], na.rm = TRUE)
# 3. Values mean
ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad",
"ehi_val_opinion", "ehi_val_performance",
"ehi_val_justice")], na.rm = TRUE)
# 4. Life satisfaction mean
ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent",
"ehi_life_satisfied", "ehi_life_important",
"ehi_life_change")], na.rm = TRUE)
# 5. Global mean (all 20 variables)
ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music",
"ehi_pref_tv", "ehi_pref_nap",
"ehi_pref_travel",
"ehi_pers_extravert", "ehi_pers_critical",
"ehi_pers_dependable", "ehi_pers_anxious",
"ehi_pers_complex",
"ehi_val_obey", "ehi_val_trad",
"ehi_val_opinion", "ehi_val_performance",
"ehi_val_justice",
"ehi_life_ideal", "ehi_life_excellent",
"ehi_life_satisfied", "ehi_life_important",
"ehi_life_change")], na.rm = TRUE)
# QA: Verify mean calculations
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
cat("Verifying EHI mean calculations\n\n")
cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
for (i in 1:5) {
vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i],
ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i],
ehi1$ehi_pref_travel[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- ehi1$ehi_pref_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
for (i in 1:5) {
vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i],
ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i],
ehi1$ehi_pers_complex[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- ehi1$ehi_pers_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
for (i in 1:5) {
vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i],
ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i],
ehi1$ehi_val_justice[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- ehi1$ehi_val_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
for (i in 1:5) {
vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i],
ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i],
ehi1$ehi_life_change[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- ehi1$ehi_life_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
for (i in 1:5) {
vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i],
ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i],
ehi1$ehi_pref_travel[i],
ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i],
ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i],
ehi1$ehi_pers_complex[i],
ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i],
ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i],
ehi1$ehi_val_justice[i],
ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i],
ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i],
ehi1$ehi_life_change[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- ehi1$ehi_global_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
# Overall QA check for all rows
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
qa_checks <- list(
list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
target = "ehi_pref_mean", name = "Preferences"),
list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
target = "ehi_pers_mean", name = "Personality"),
list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
target = "ehi_val_mean", name = "Values"),
list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
target = "ehi_life_mean", name = "Life Satisfaction"),
list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
"ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
"ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
"ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
target = "ehi_global_mean", name = "Global")
)
all_checks_passed <- TRUE
for (check in qa_checks) {
calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
actual_mean <- ehi1[[check$target]]
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
check$name, length(check$vars), nrow(ehi1)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(ehi1, "ehi1.csv", row.names = FALSE)
cat("\nDataset saved to ehi1.csv\n")

View File

@ -0,0 +1,38 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
data <- read.csv("ehi1.csv")
# Check the levels of the demo_edu variable
print(levels(factor(data$demo_edu)))
# Also show the unique values and their frequencies
print("\nUnique values and frequencies:")
print(table(data$demo_edu, useNA = "ifany"))
# Recode demo_edu into 3 ordinal levels
data$edu3 <- NA
# HS_TS: High School and Trade School
data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
# C_Ug: College and University - Undergraduate
data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
# grad_prof: University - Graduate, University - PhD, and Professional Degree
data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
# Convert to ordered factor
data$edu3 <- factor(data$edu3,
levels = c("HS_TS", "C_Ug", "grad_prof"),
ordered = TRUE)
# Check the recoded variable
print(table(data$edu3, useNA = "ifany"))
# Verify the recoding
print(table(data$demo_edu, data$edu3, useNA = "ifany"))
# Save the updated dataset with the new edu3 variable
write.csv(data, "ehi1.csv", row.names = FALSE)

View File

@ -0,0 +1,14 @@
"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
"eohiDGEN_pref",1,1063,0.31138,2.29712,0,0.27497,1.4826,-10,10,20,0.16174,2.49869,0.07046
"eohiDGEN_pers",2,1063,0.44309,2.40063,0,0.39365,1.4826,-10,10,20,0.04618,2.20622,0.07363
"eohiDGEN_val",3,1063,0.49012,2.38159,0,0.3772,1.4826,-10,10,20,0.37348,2.6545,0.07305
"eohiDGEN_life",4,1063,0.41016,2.72624,0,0.3866,1.4826,-10,10,20,0.06705,1.55144,0.08362
"eohiDGEN_mean",5,1063,0.41488,1.7406,0,0.34323,0.99334,-9,9.33,18.33,0.43898,2.96172,0.05339
"ehi_pref_mean",6,1063,0.13885,0.64266,0,0.09683,0.29652,-2.2,3.2,5.4,0.93756,3.43951,0.01971
"ehi_pers_mean",7,1063,0.09614,0.71979,0,0.07027,0.59304,-3.6,3.8,7.4,0.53524,3.53968,0.02208
"ehi_val_mean",8,1063,0.19699,0.72241,0,0.14266,0.29652,-3.4,4.4,7.8,1.17086,5.08196,0.02216
"ehi_life_mean",9,1063,0.1127,1.12639,0,0.06792,0.59304,-4.8,6,10.8,0.45879,2.97822,0.03455
"ehi_global_mean",10,1063,0.13617,0.50405,0.05,0.09871,0.37065,-1.65,2.65,4.3,1.09805,3.7645,0.01546
"AOT_total",11,1063,0.6945,0.60845,0.75,0.69859,0.7413,-1.125,2,3.125,-0.09166,-0.61789,0.01866
"CRT_correct",12,1063,0.3132,0.36479,0.33,0.26666,0.48926,0,1,1,0.79518,-0.77639,0.01119
"CRT_int",13,1063,0.614,0.36086,0.67,0.6424,0.48926,0,1,1,-0.45012,-1.11195,0.01107
1 vars n mean sd median trimmed mad min max range skew kurtosis se
1 vars n mean sd median trimmed mad min max range skew kurtosis se
2 eohiDGEN_pref 1 1063 0.31138 2.29712 0 0.27497 1.4826 -10 10 20 0.16174 2.49869 0.07046
3 eohiDGEN_pers 2 1063 0.44309 2.40063 0 0.39365 1.4826 -10 10 20 0.04618 2.20622 0.07363
4 eohiDGEN_val 3 1063 0.49012 2.38159 0 0.3772 1.4826 -10 10 20 0.37348 2.6545 0.07305
5 eohiDGEN_life 4 1063 0.41016 2.72624 0 0.3866 1.4826 -10 10 20 0.06705 1.55144 0.08362
6 eohiDGEN_mean 5 1063 0.41488 1.7406 0 0.34323 0.99334 -9 9.33 18.33 0.43898 2.96172 0.05339
7 ehi_pref_mean 6 1063 0.13885 0.64266 0 0.09683 0.29652 -2.2 3.2 5.4 0.93756 3.43951 0.01971
8 ehi_pers_mean 7 1063 0.09614 0.71979 0 0.07027 0.59304 -3.6 3.8 7.4 0.53524 3.53968 0.02208
9 ehi_val_mean 8 1063 0.19699 0.72241 0 0.14266 0.29652 -3.4 4.4 7.8 1.17086 5.08196 0.02216
10 ehi_life_mean 9 1063 0.1127 1.12639 0 0.06792 0.59304 -4.8 6 10.8 0.45879 2.97822 0.03455
11 ehi_global_mean 10 1063 0.13617 0.50405 0.05 0.09871 0.37065 -1.65 2.65 4.3 1.09805 3.7645 0.01546
12 AOT_total 11 1063 0.6945 0.60845 0.75 0.69859 0.7413 -1.125 2 3.125 -0.09166 -0.61789 0.01866
13 CRT_correct 12 1063 0.3132 0.36479 0.33 0.26666 0.48926 0 1 1 0.79518 -0.77639 0.01119
14 CRT_int 13 1063 0.614 0.36086 0.67 0.6424 0.48926 0 1 1 -0.45012 -1.11195 0.01107

View File

@ -0,0 +1,107 @@
library(tidyverse)
library(ggplot2)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read data
data <- read.csv("exp1.csv")
# Select variables ending exactly with _T or _F
df <- data %>% select(matches("(_T|_F)$"))
# Remove demo_f variable (if present)
df <- df %>% select(-any_of("demo_f"))
str(df)
# Coerce to numeric where possible (without breaking non-numeric)
df_num <- df %>%
mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
# Compute count and proportion correct per variable
descriptives <- purrr::imap_dfr(df_num, function(col, name) {
x <- suppressWarnings(as.numeric(col))
x <- x[!is.na(x)]
n_total <- length(x)
n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
prop <- if (n_total == 0) NA_real_ else n_correct / n_total
# Extract difficulty number from variable name and map to expected range
difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
expected_ranges <- list(
"15" = c(0.15, 0.25),
"35" = c(0.35, 0.45),
"55" = c(0.55, 0.65),
"75" = c(0.75, 0.85)
)
if (as.character(difficulty_num) %in% names(expected_ranges)) {
expected_range <- expected_ranges[[as.character(difficulty_num)]]
match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
} else {
match_difficulty <- "UNKNOWN"
}
tibble(
variable = name,
n_total = n_total,
n_correct = n_correct,
prop_correct = round(prop, 5),
match_difficulty = match_difficulty
)
}) %>%
arrange(variable)
# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
bin_factor <- cut(
descriptives$prop_correct,
breaks = seq(0.10, 1.00, by = 0.10),
right = FALSE,
include.lowest = FALSE,
labels = bin_levels
)
bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
group_by(bin) %>%
summarise(num_variables = n(), .groups = "drop")
# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
bin15_factor <- cut(
descriptives$prop_correct,
breaks = seq(0.15, 0.95, by = 0.10),
right = FALSE,
include.lowest = FALSE,
labels = bin15_levels
)
bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
group_by(bin) %>%
summarise(num_variables = n(), .groups = "drop")
# View
print(descriptives, n = Inf)
cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
print(bin_counts, n = Inf)
cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
print(bin15_counts, n = Inf)
# Histogram of proportion correct with custom bins
histogram <- ggplot(descriptives, aes(x = prop_correct)) +
geom_histogram(
breaks = seq(0.15, 0.95, by = 0.10),
fill = "lightblue",
color = "black",
alpha = 0.7
) +
labs(
title = "Distribution of Proportion Correct",
x = "Proportion Correct",
y = "Number of Variables"
) +
theme_minimal() +
scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
print(histogram)
# Optionally save
# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")

View File

@ -0,0 +1,88 @@
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
options(scipen = 999)
df <- read.csv("ehi1.csv")
library(psych)
library(knitr)
# fixed-decimal formatter (five decimals)
fmt5 <- function(x) formatC(x, format = "f", digits = 5)
# Select the 4 variables for reliability analysis
reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]),
c("eohiDGEN_mean", "ehi_global_mean")]
# Cronbach's Alpha
alpha_result <- alpha(reliability_data)
# Split-half reliability
split_half <- splitHalf(reliability_data)
# Inter-item correlations
cor_matrix <- cor(reliability_data, use = "complete.obs")
# Two-item reliability summary (if applicable)
two_item_section <- ""
if (ncol(reliability_data) == 2) {
n_complete <- sum(complete.cases(reliability_data))
r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
# Fisher z CI for r
fisher_z <- atanh(r_pearson)
se_z <- 1 / sqrt(n_complete - 3)
z_crit <- qnorm(0.975)
ci_z_lower <- fisher_z - z_crit * se_z
ci_z_upper <- fisher_z + z_crit * se_z
ci_r_lower <- tanh(ci_z_lower)
ci_r_upper <- tanh(ci_z_upper)
# Approximate Fisher z CI for Spearman rho (large-sample approximation)
fisher_z_s <- atanh(r_spearman)
ci_zs_lower <- fisher_z_s - z_crit * se_z
ci_zs_upper <- fisher_z_s + z_crit * se_z
ci_s_lower <- tanh(ci_zs_lower)
ci_s_upper <- tanh(ci_zs_upper)
# SpearmanBrown/Cronbach's alpha for k = 2
alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
two_item_section <- paste0(
"<h2>Two-Item Reliability Summary</h2>",
"<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
"<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
"<p>SpearmanBrown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
" (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
)
}
# Create a summary table
summary_table <- data.frame(
Variable = names(reliability_data),
n = nrow(reliability_data),
Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
)
html_output <- paste0(
"<html><head><title>EHI Reliability Analysis</title></head><body>",
"<h1>EHI Reliability Analysis</h1>",
two_item_section,
"<h2>Cronbach's Alpha</h2>",
kable(alpha_result$total, format = "html"),
"<h2>Split-Half Reliability</h2>",
"<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
"<h2>Item-Level Statistics</h2>",
kable(alpha_result$item.stats, format = "html"),
"</body></html>"
)
writeLines(html_output, "EHI reliability.html")

Binary file not shown.

Before

Width:  |  Height:  |  Size: 177 KiB

After

Width:  |  Height:  |  Size: 177 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 176 KiB

After

Width:  |  Height:  |  Size: 176 KiB

1064
eohi1/ehi1 - Copy.csv Normal file

File diff suppressed because it is too large Load Diff

1064
eohi1/ehi1.csv Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
eohi_var,cal_var,n,pearson_r,pearson_ci_lower,pearson_ci_upper,pearson_p,spearman_rho,spearman_ci_lower,spearman_ci_upper,spearman_p
eohi_mean,cal_selfActual,1063,-0.09447,-0.15019,-0.0362,0.00205,-0.11288,-0.1699,-0.05351,0.00023
eohi_val,cal_selfActual,1063,-0.09384,-0.15347,-0.03616,0.00219,-0.09963,-0.16267,-0.04046,0.00114
eohi_pers,cal_selfActual,1063,-0.07652,-0.12898,-0.01613,0.01258,-0.08281,-0.14357,-0.02535,0.00691
eohiDGEN_pref,cal_15,1063,0.11449,0.04892,0.17239,0.00018,0.08071,0.01776,0.13995,0.00847
eohi_pers,cal_55,1063,-0.06004,-0.1185,-0.0065,0.05036,-0.07699,-0.13816,-0.01519,0.01204
eohi_mean,cal_55,1063,-0.06632,-0.11782,-0.01552,0.03061,-0.07588,-0.13634,-0.01868,0.01333
eohiDGEN_val,cal_55,1063,-0.05006,-0.1073,0.00598,0.10281,-0.06964,-0.12753,-0.01077,0.02318
eohiDGEN_mean,cal_true,1063,0.05754,-0.0009,0.11652,0.06074,0.0676,0.00725,0.12602,0.02754
eohiDGEN_life,cal_75,1063,-0.04564,-0.10516,0.01314,0.13702,-0.05638,-0.11573,0.00538,0.06615
eohi_life,cal_75,1063,-0.03875,-0.09577,0.01879,0.20683,-0.05565,-0.11564,0.00491,0.06974
eohiDGEN_mean,cal_15,1063,0.07628,0.01598,0.13615,0.01285,0.05395,-0.00598,0.11493,0.0787
eohi_val,cal_55,1063,-0.05666,-0.10898,-0.00182,0.06479,-0.05346,-0.10885,0.00265,0.0815
eohiDGEN_life,cal_55,1063,0.05044,-0.00773,0.10614,0.10026,0.04956,-0.00878,0.11087,0.10635
eohiDGEN_pref,cal_true,1063,0.07292,0.0095,0.134,0.01741,0.04803,-0.0094,0.108,0.11761
eohi_pref,cal_selfActual,1063,-0.03281,-0.09762,0.032,0.28519,-0.04423,-0.10373,0.01867,0.14958
eohiDGEN_val,cal_false,1063,-0.02787,-0.09239,0.03411,0.36398,-0.04393,-0.10451,0.01416,0.15236
eohiDGEN_pers,cal_15,1063,0.04714,-0.01715,0.10944,0.12451,0.04308,-0.01746,0.10317,0.16046
eohiDGEN_life,cal_false,1063,0.03476,-0.02508,0.09492,0.25753,0.04047,-0.02102,0.10411,0.18735
eohi_life,cal_true,1063,-0.02269,-0.08192,0.0416,0.45982,-0.03779,-0.0985,0.01964,0.21833
eohiDGEN_life,cal_15,1063,0.05091,-0.01796,0.11623,0.09709,0.03755,-0.02524,0.09835,0.22121
eohiDGEN_life,cal_true,1063,-0.00526,-0.06612,0.05833,0.86395,-0.03744,-0.09633,0.02643,0.22263
eohiDGEN_life,cal_selfActual,1063,0.02779,-0.03579,0.0868,0.36538,0.03688,-0.02625,0.10029,0.22962
eohi_val,cal_75,1063,-0.03432,-0.09303,0.02511,0.26362,-0.03548,-0.09664,0.02788,0.24775
eohiDGEN_val,cal_selfActual,1063,0.02114,-0.04078,0.08546,0.49105,0.03547,-0.02636,0.09663,0.24789
eohiDGEN_pers,cal_true,1063,0.02354,-0.03617,0.08422,0.44334,0.03532,-0.02544,0.09158,0.24994
eohiDGEN_pers,cal_35,1063,-0.03495,-0.09252,0.02312,0.25493,-0.03489,-0.09183,0.02535,0.25578
eohiDGEN_pers,cal_75,1063,-0.01611,-0.07516,0.04168,0.59971,-0.03379,-0.08928,0.02687,0.271
eohiDGEN_pers,cal_false,1063,-0.02503,-0.08151,0.03334,0.4149,-0.03266,-0.08708,0.02624,0.28745
eohiDGEN_val,cal_global,1063,-0.00447,-0.05803,0.05482,0.8843,-0.03204,-0.08825,0.0252,0.29666
eohiDGEN_mean,cal_55,1063,-0.03699,-0.08943,0.01668,0.2282,-0.02929,-0.08473,0.03097,0.34006
eohiDGEN_pers,cal_selfActual,1063,-0.03827,-0.10619,0.02517,0.21254,-0.02835,-0.09145,0.03191,0.35572
eohi_life,cal_selfActual,1063,0.04737,-0.01652,0.10847,0.12274,0.02825,-0.03176,0.08666,0.35753
eohi_mean,cal_75,1063,-0.01187,-0.06715,0.04855,0.69916,-0.0275,-0.0859,0.03279,0.37045
eohiDGEN_val,cal_15,1063,0.00914,-0.04676,0.06618,0.76607,-0.02707,-0.08456,0.02837,0.37789
eohi_pref,cal_55,1063,-0.02632,-0.08106,0.02643,0.39136,-0.02524,-0.08705,0.03558,0.41101
eohiDGEN_pref,cal_35,1063,-0.00852,-0.0681,0.0469,0.78147,-0.02501,-0.08136,0.03028,0.41525
eohi_val,cal_global,1063,-0.03245,-0.08628,0.02097,0.29049,-0.02445,-0.0837,0.03405,0.42589
eohiDGEN_pref,cal_global,1063,0.05135,-0.00854,0.10781,0.09423,0.02438,-0.03698,0.07747,0.42716
eohi_mean,cal_global,1063,-0.00987,-0.0669,0.04625,0.74782,-0.02423,-0.08093,0.03538,0.43002
eohi_mean,cal_false,1063,0.00103,-0.05609,0.05796,0.97329,-0.02413,-0.08528,0.034,0.43184
eohi_pers,cal_15,1063,0.0424,-0.01973,0.10006,0.16717,0.02377,-0.03499,0.07967,0.4388
eohiDGEN_pref,cal_75,1063,0.04194,-0.01865,0.10462,0.17182,0.02253,-0.03687,0.08394,0.46315
eohi_val,cal_35,1063,0.00919,-0.04488,0.06395,0.76478,-0.02132,-0.07724,0.0359,0.48748
eohiDGEN_mean,cal_75,1063,0.02182,-0.03779,0.08058,0.47734,0.02086,-0.0387,0.08009,0.49684
eohi_val,cal_15,1063,-0.0114,-0.06675,0.04342,0.71036,0.02045,-0.0384,0.08265,0.50533
eohiDGEN_mean,cal_global,1063,0.01728,-0.04015,0.07341,0.5736,0.01974,-0.03874,0.07654,0.52029
eohiDGEN_val,cal_true,1063,0.03211,-0.02651,0.08716,0.29554,0.01936,-0.04555,0.08179,0.52834
eohi_mean,cal_15,1063,0.02437,-0.03634,0.08546,0.42732,0.01772,-0.03986,0.07648,0.56377
eohiDGEN_pers,cal_55,1063,-0.02109,-0.07397,0.03478,0.49211,-0.0175,-0.0759,0.04546,0.56869
eohiDGEN_mean,cal_selfActual,1063,0.00131,-0.05794,0.06413,0.96594,0.0169,-0.04525,0.07532,0.582
eohi_life,cal_false,1063,0.02929,-0.02351,0.0794,0.34004,0.01689,-0.04339,0.07467,0.58238
eohiDGEN_mean,cal_false,1063,-0.02032,-0.07402,0.03873,0.5082,-0.01576,-0.07583,0.04145,0.60773
eohiDGEN_pref,cal_55,1063,-0.01025,-0.064,0.04642,0.73848,-0.01542,-0.0708,0.046,0.61559
eohi_life,cal_35,1063,0.03599,-0.02203,0.08892,0.24108,0.01538,-0.04474,0.07414,0.61636
eohi_pers,cal_false,1063,-0.00935,-0.06513,0.04644,0.76086,-0.01485,-0.07336,0.04336,0.62859
eohi_life,cal_15,1063,0.0156,-0.0452,0.07483,0.61145,0.01432,-0.04315,0.07438,0.64092
eohi_pers,cal_true,1063,0.0055,-0.05876,0.07464,0.85794,-0.01291,-0.06976,0.04539,0.67419
eohi_pref,cal_75,1063,0.02725,-0.03153,0.08793,0.37474,0.01264,-0.0447,0.07576,0.68049
eohi_mean,cal_true,1063,-0.01786,-0.07676,0.04265,0.56079,-0.01207,-0.07091,0.05063,0.69435
eohi_val,cal_false,1063,-0.01276,-0.07529,0.04177,0.6777,-0.0115,-0.0741,0.04742,0.7081
eohi_pers,cal_35,1063,0.01428,-0.04037,0.07111,0.6418,0.01092,-0.04486,0.06885,0.7222
eohiDGEN_pers,cal_global,1063,-0.00723,-0.06312,0.05198,0.81387,-0.01039,-0.06483,0.05266,0.73501
eohi_mean,cal_35,1063,0.02233,-0.03257,0.08046,0.46697,-0.01029,-0.06915,0.04512,0.73748
eohi_pref,cal_global,1063,0.0183,-0.03841,0.07643,0.55118,0.00972,-0.0475,0.07007,0.75165
eohi_pers,cal_global,1063,-0.00467,-0.06118,0.05112,0.87911,-0.00916,-0.06635,0.05143,0.76546
eohiDGEN_pref,cal_selfActual,1063,0.02085,-0.03875,0.07863,0.49712,0.00839,-0.04939,0.06846,0.78474
eohi_val,cal_true,1063,-0.0358,-0.0949,0.02654,0.24352,-0.0081,-0.07342,0.05105,0.79199
eohi_pref,cal_15,1063,0.02312,-0.04369,0.0815,0.45148,0.00755,-0.05412,0.06986,0.80582
eohi_pref,cal_false,1063,0.02725,-0.03088,0.08754,0.37477,0.00736,-0.05157,0.06691,0.8106
eohiDGEN_pref,cal_false,1063,0.00873,-0.04608,0.05948,0.77614,0.00453,-0.05188,0.05691,0.88276
eohi_pref,cal_35,1063,0.02663,-0.03409,0.08569,0.38574,0.00424,-0.05505,0.06505,0.89016
eohiDGEN_life,cal_global,1063,0.02651,-0.03474,0.08515,0.38781,0.00414,-0.06008,0.06383,0.89275
eohiDGEN_val,cal_35,1063,0.0047,-0.04819,0.06294,0.87827,-0.00387,-0.06182,0.05569,0.89972
eohi_pers,cal_75,1063,-0.01501,-0.08009,0.05265,0.62493,-0.00292,-0.06324,0.06169,0.92424
eohi_life,cal_global,1063,0.01135,-0.04298,0.06703,0.71158,-0.00291,-0.0638,0.0565,0.92459
eohiDGEN_life,cal_35,1063,0.01256,-0.04397,0.071,0.68263,-0.00226,-0.06355,0.06168,0.94137
eohi_life,cal_55,1063,0.0154,-0.0459,0.07265,0.61609,0.00223,-0.05962,0.05937,0.94216
eohiDGEN_mean,cal_35,1063,-0.01765,-0.07148,0.0442,0.5654,-0.0016,-0.05665,0.05409,0.95856
eohiDGEN_val,cal_75,1063,0.02365,-0.04062,0.08878,0.44107,-0.00147,-0.06785,0.06103,0.96186
eohi_pref,cal_true,1063,-0.00826,-0.06273,0.05423,0.78791,0.00098,-0.05588,0.06502,0.97445
1 eohi_var cal_var n pearson_r pearson_ci_lower pearson_ci_upper pearson_p spearman_rho spearman_ci_lower spearman_ci_upper spearman_p
1 eohi_var cal_var n pearson_r pearson_ci_lower pearson_ci_upper pearson_p spearman_rho spearman_ci_lower spearman_ci_upper spearman_p
2 eohi_mean cal_selfActual 1063 -0.09447 -0.15019 -0.0362 0.00205 -0.11288 -0.1699 -0.05351 0.00023
3 eohi_val cal_selfActual 1063 -0.09384 -0.15347 -0.03616 0.00219 -0.09963 -0.16267 -0.04046 0.00114
4 eohi_pers cal_selfActual 1063 -0.07652 -0.12898 -0.01613 0.01258 -0.08281 -0.14357 -0.02535 0.00691
5 eohiDGEN_pref cal_15 1063 0.11449 0.04892 0.17239 0.00018 0.08071 0.01776 0.13995 0.00847
6 eohi_pers cal_55 1063 -0.06004 -0.1185 -0.0065 0.05036 -0.07699 -0.13816 -0.01519 0.01204
7 eohi_mean cal_55 1063 -0.06632 -0.11782 -0.01552 0.03061 -0.07588 -0.13634 -0.01868 0.01333
8 eohiDGEN_val cal_55 1063 -0.05006 -0.1073 0.00598 0.10281 -0.06964 -0.12753 -0.01077 0.02318
9 eohiDGEN_mean cal_true 1063 0.05754 -0.0009 0.11652 0.06074 0.0676 0.00725 0.12602 0.02754
10 eohiDGEN_life cal_75 1063 -0.04564 -0.10516 0.01314 0.13702 -0.05638 -0.11573 0.00538 0.06615
11 eohi_life cal_75 1063 -0.03875 -0.09577 0.01879 0.20683 -0.05565 -0.11564 0.00491 0.06974
12 eohiDGEN_mean cal_15 1063 0.07628 0.01598 0.13615 0.01285 0.05395 -0.00598 0.11493 0.0787
13 eohi_val cal_55 1063 -0.05666 -0.10898 -0.00182 0.06479 -0.05346 -0.10885 0.00265 0.0815
14 eohiDGEN_life cal_55 1063 0.05044 -0.00773 0.10614 0.10026 0.04956 -0.00878 0.11087 0.10635
15 eohiDGEN_pref cal_true 1063 0.07292 0.0095 0.134 0.01741 0.04803 -0.0094 0.108 0.11761
16 eohi_pref cal_selfActual 1063 -0.03281 -0.09762 0.032 0.28519 -0.04423 -0.10373 0.01867 0.14958
17 eohiDGEN_val cal_false 1063 -0.02787 -0.09239 0.03411 0.36398 -0.04393 -0.10451 0.01416 0.15236
18 eohiDGEN_pers cal_15 1063 0.04714 -0.01715 0.10944 0.12451 0.04308 -0.01746 0.10317 0.16046
19 eohiDGEN_life cal_false 1063 0.03476 -0.02508 0.09492 0.25753 0.04047 -0.02102 0.10411 0.18735
20 eohi_life cal_true 1063 -0.02269 -0.08192 0.0416 0.45982 -0.03779 -0.0985 0.01964 0.21833
21 eohiDGEN_life cal_15 1063 0.05091 -0.01796 0.11623 0.09709 0.03755 -0.02524 0.09835 0.22121
22 eohiDGEN_life cal_true 1063 -0.00526 -0.06612 0.05833 0.86395 -0.03744 -0.09633 0.02643 0.22263
23 eohiDGEN_life cal_selfActual 1063 0.02779 -0.03579 0.0868 0.36538 0.03688 -0.02625 0.10029 0.22962
24 eohi_val cal_75 1063 -0.03432 -0.09303 0.02511 0.26362 -0.03548 -0.09664 0.02788 0.24775
25 eohiDGEN_val cal_selfActual 1063 0.02114 -0.04078 0.08546 0.49105 0.03547 -0.02636 0.09663 0.24789
26 eohiDGEN_pers cal_true 1063 0.02354 -0.03617 0.08422 0.44334 0.03532 -0.02544 0.09158 0.24994
27 eohiDGEN_pers cal_35 1063 -0.03495 -0.09252 0.02312 0.25493 -0.03489 -0.09183 0.02535 0.25578
28 eohiDGEN_pers cal_75 1063 -0.01611 -0.07516 0.04168 0.59971 -0.03379 -0.08928 0.02687 0.271
29 eohiDGEN_pers cal_false 1063 -0.02503 -0.08151 0.03334 0.4149 -0.03266 -0.08708 0.02624 0.28745
30 eohiDGEN_val cal_global 1063 -0.00447 -0.05803 0.05482 0.8843 -0.03204 -0.08825 0.0252 0.29666
31 eohiDGEN_mean cal_55 1063 -0.03699 -0.08943 0.01668 0.2282 -0.02929 -0.08473 0.03097 0.34006
32 eohiDGEN_pers cal_selfActual 1063 -0.03827 -0.10619 0.02517 0.21254 -0.02835 -0.09145 0.03191 0.35572
33 eohi_life cal_selfActual 1063 0.04737 -0.01652 0.10847 0.12274 0.02825 -0.03176 0.08666 0.35753
34 eohi_mean cal_75 1063 -0.01187 -0.06715 0.04855 0.69916 -0.0275 -0.0859 0.03279 0.37045
35 eohiDGEN_val cal_15 1063 0.00914 -0.04676 0.06618 0.76607 -0.02707 -0.08456 0.02837 0.37789
36 eohi_pref cal_55 1063 -0.02632 -0.08106 0.02643 0.39136 -0.02524 -0.08705 0.03558 0.41101
37 eohiDGEN_pref cal_35 1063 -0.00852 -0.0681 0.0469 0.78147 -0.02501 -0.08136 0.03028 0.41525
38 eohi_val cal_global 1063 -0.03245 -0.08628 0.02097 0.29049 -0.02445 -0.0837 0.03405 0.42589
39 eohiDGEN_pref cal_global 1063 0.05135 -0.00854 0.10781 0.09423 0.02438 -0.03698 0.07747 0.42716
40 eohi_mean cal_global 1063 -0.00987 -0.0669 0.04625 0.74782 -0.02423 -0.08093 0.03538 0.43002
41 eohi_mean cal_false 1063 0.00103 -0.05609 0.05796 0.97329 -0.02413 -0.08528 0.034 0.43184
42 eohi_pers cal_15 1063 0.0424 -0.01973 0.10006 0.16717 0.02377 -0.03499 0.07967 0.4388
43 eohiDGEN_pref cal_75 1063 0.04194 -0.01865 0.10462 0.17182 0.02253 -0.03687 0.08394 0.46315
44 eohi_val cal_35 1063 0.00919 -0.04488 0.06395 0.76478 -0.02132 -0.07724 0.0359 0.48748
45 eohiDGEN_mean cal_75 1063 0.02182 -0.03779 0.08058 0.47734 0.02086 -0.0387 0.08009 0.49684
46 eohi_val cal_15 1063 -0.0114 -0.06675 0.04342 0.71036 0.02045 -0.0384 0.08265 0.50533
47 eohiDGEN_mean cal_global 1063 0.01728 -0.04015 0.07341 0.5736 0.01974 -0.03874 0.07654 0.52029
48 eohiDGEN_val cal_true 1063 0.03211 -0.02651 0.08716 0.29554 0.01936 -0.04555 0.08179 0.52834
49 eohi_mean cal_15 1063 0.02437 -0.03634 0.08546 0.42732 0.01772 -0.03986 0.07648 0.56377
50 eohiDGEN_pers cal_55 1063 -0.02109 -0.07397 0.03478 0.49211 -0.0175 -0.0759 0.04546 0.56869
51 eohiDGEN_mean cal_selfActual 1063 0.00131 -0.05794 0.06413 0.96594 0.0169 -0.04525 0.07532 0.582
52 eohi_life cal_false 1063 0.02929 -0.02351 0.0794 0.34004 0.01689 -0.04339 0.07467 0.58238
53 eohiDGEN_mean cal_false 1063 -0.02032 -0.07402 0.03873 0.5082 -0.01576 -0.07583 0.04145 0.60773
54 eohiDGEN_pref cal_55 1063 -0.01025 -0.064 0.04642 0.73848 -0.01542 -0.0708 0.046 0.61559
55 eohi_life cal_35 1063 0.03599 -0.02203 0.08892 0.24108 0.01538 -0.04474 0.07414 0.61636
56 eohi_pers cal_false 1063 -0.00935 -0.06513 0.04644 0.76086 -0.01485 -0.07336 0.04336 0.62859
57 eohi_life cal_15 1063 0.0156 -0.0452 0.07483 0.61145 0.01432 -0.04315 0.07438 0.64092
58 eohi_pers cal_true 1063 0.0055 -0.05876 0.07464 0.85794 -0.01291 -0.06976 0.04539 0.67419
59 eohi_pref cal_75 1063 0.02725 -0.03153 0.08793 0.37474 0.01264 -0.0447 0.07576 0.68049
60 eohi_mean cal_true 1063 -0.01786 -0.07676 0.04265 0.56079 -0.01207 -0.07091 0.05063 0.69435
61 eohi_val cal_false 1063 -0.01276 -0.07529 0.04177 0.6777 -0.0115 -0.0741 0.04742 0.7081
62 eohi_pers cal_35 1063 0.01428 -0.04037 0.07111 0.6418 0.01092 -0.04486 0.06885 0.7222
63 eohiDGEN_pers cal_global 1063 -0.00723 -0.06312 0.05198 0.81387 -0.01039 -0.06483 0.05266 0.73501
64 eohi_mean cal_35 1063 0.02233 -0.03257 0.08046 0.46697 -0.01029 -0.06915 0.04512 0.73748
65 eohi_pref cal_global 1063 0.0183 -0.03841 0.07643 0.55118 0.00972 -0.0475 0.07007 0.75165
66 eohi_pers cal_global 1063 -0.00467 -0.06118 0.05112 0.87911 -0.00916 -0.06635 0.05143 0.76546
67 eohiDGEN_pref cal_selfActual 1063 0.02085 -0.03875 0.07863 0.49712 0.00839 -0.04939 0.06846 0.78474
68 eohi_val cal_true 1063 -0.0358 -0.0949 0.02654 0.24352 -0.0081 -0.07342 0.05105 0.79199
69 eohi_pref cal_15 1063 0.02312 -0.04369 0.0815 0.45148 0.00755 -0.05412 0.06986 0.80582
70 eohi_pref cal_false 1063 0.02725 -0.03088 0.08754 0.37477 0.00736 -0.05157 0.06691 0.8106
71 eohiDGEN_pref cal_false 1063 0.00873 -0.04608 0.05948 0.77614 0.00453 -0.05188 0.05691 0.88276
72 eohi_pref cal_35 1063 0.02663 -0.03409 0.08569 0.38574 0.00424 -0.05505 0.06505 0.89016
73 eohiDGEN_life cal_global 1063 0.02651 -0.03474 0.08515 0.38781 0.00414 -0.06008 0.06383 0.89275
74 eohiDGEN_val cal_35 1063 0.0047 -0.04819 0.06294 0.87827 -0.00387 -0.06182 0.05569 0.89972
75 eohi_pers cal_75 1063 -0.01501 -0.08009 0.05265 0.62493 -0.00292 -0.06324 0.06169 0.92424
76 eohi_life cal_global 1063 0.01135 -0.04298 0.06703 0.71158 -0.00291 -0.0638 0.0565 0.92459
77 eohiDGEN_life cal_35 1063 0.01256 -0.04397 0.071 0.68263 -0.00226 -0.06355 0.06168 0.94137
78 eohi_life cal_55 1063 0.0154 -0.0459 0.07265 0.61609 0.00223 -0.05962 0.05937 0.94216
79 eohiDGEN_mean cal_35 1063 -0.01765 -0.07148 0.0442 0.5654 -0.0016 -0.05665 0.05409 0.95856
80 eohiDGEN_val cal_75 1063 0.02365 -0.04062 0.08878 0.44107 -0.00147 -0.06785 0.06103 0.96186
81 eohi_pref cal_true 1063 -0.00826 -0.06273 0.05423 0.78791 0.00098 -0.05588 0.06502 0.97445

BIN
eohi1/eohi_process.xlsm Normal file

Binary file not shown.

1073
eohi1/exp1.csv Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,33 @@
variable,accuracy,TF,n_total,n_correct,prop_correct,match_difficulty,Table A1,Table A2,match_difficulty,,,,,,,
gest_75_T,75,T,1063,952,0.89558,NO,0.75,0.9,M,,,,,,,
dors_55_T,55,T,1063,949,0.89276,NO,0.64,0.96,M,,,,,,,
chur_75_T,75,T,1063,947,0.89087,NO,0.83,0.9,M,,,,,,,
mons_55_T,55,T,1063,944,0.88805,NO,0.55,0.82,H,,,,,,,
lock_35_T,35,T,1063,923,0.8683,NO,0.45,0.9,M,,,,,,,
hume_35_F,35,F,1063,873,0.82126,NO,0.43,0.94,M,,,,,,,
papy_75_T,75,T,1063,873,0.82126,YES,0.78,0.92,M,,,,,,,
sham_55_T,55,T,1063,860,0.80903,NO,0.57,1,M,,,,,,,0.665565
list_75_F,75,F,1063,855,0.80433,YES,0.85,0.96,L,,,,,,,
cons_55_T,55,T,1063,816,0.76764,NO,0.57,0.9,M,,,,,,,
tsun_75_T,75,T,1063,813,0.76482,YES,0.8,0.96,L,,,,,,,
pana_35_T,35,T,1063,805,0.75729,NO,0.37,0.65,H,,,,,,,
kabu_15_T,15,T,1063,791,0.74412,NO,0.25,0.76,M,,,,,,,
gulf_15_F,15,F,1063,773,0.72719,NO,0.16,0.24,H,,,,,,,
oedi_35_T,35,T,1063,767,0.72154,NO,0.43,0.88,M,,,,,,,
vaud_15_T,15,T,1063,766,0.7206,NO,0.24,0.41,H,,,,,,,
mont_35_F,35,F,1063,748,0.70367,NO,0.36,0.78,M,,,,,,,
demo_15_F,15,F,1063,727,0.68391,NO,0.19,0.39,H,,,,,,,
spee_75_F,75,F,1063,688,0.64722,NO,0.78,0.98,L,,,,,,,
dwar_55_F,55,F,1063,663,0.62371,YES,0.64,0.76,L,,,,,,,
carb_35_T,35,T,1063,629,0.59172,NO,0.39,0.31,H,,,,,,,
bohr_15_T,15,T,1063,626,0.5889,NO,0.16,0.37,H,,,,,,,
gang_15_F,15,F,1063,572,0.5381,NO,0.21,0.39,H,,,,,,,
vitc_55_F,55,F,1063,556,0.52305,NO,0.63,0.84,L,,,,,,,
hert_35_F,35,F,1063,551,0.51834,NO,0.41,0.65,M,,,,,,,
pucc_15_F,15,F,1063,543,0.51082,NO,0.15,0.22,H,,,,,,,
troy_15_T,15,T,1063,504,0.47413,NO,0.22,0.49,M,,,,,,,
moza_55_F,55,F,1063,500,0.47037,NO,0.59,0.71,L,,,,,,,
croc_75_F,75,F,1063,433,0.40734,NO,0.76,0.88,L,,,,,,,
gees_55_F,55,F,1063,324,0.3048,NO,0.57,0.84,L,,,,,,,
lute_35_F,35,F,1063,322,0.30292,NO,0.46,0.67,L,,,,,,,
memo_75_F,75,F,1063,265,0.24929,NO,0.83,0.88,L,,,,,,,
1 variable accuracy TF n_total n_correct prop_correct match_difficulty Table A1 Table A2 match_difficulty
1 variable accuracy TF n_total n_correct prop_correct match_difficulty Table A1 Table A2 match_difficulty
2 gest_75_T 75 T 1063 952 0.89558 NO 0.75 0.9 M
3 dors_55_T 55 T 1063 949 0.89276 NO 0.64 0.96 M
4 chur_75_T 75 T 1063 947 0.89087 NO 0.83 0.9 M
5 mons_55_T 55 T 1063 944 0.88805 NO 0.55 0.82 H
6 lock_35_T 35 T 1063 923 0.8683 NO 0.45 0.9 M
7 hume_35_F 35 F 1063 873 0.82126 NO 0.43 0.94 M
8 papy_75_T 75 T 1063 873 0.82126 YES 0.78 0.92 M
9 sham_55_T 55 T 1063 860 0.80903 NO 0.57 1 M 0.665565
10 list_75_F 75 F 1063 855 0.80433 YES 0.85 0.96 L
11 cons_55_T 55 T 1063 816 0.76764 NO 0.57 0.9 M
12 tsun_75_T 75 T 1063 813 0.76482 YES 0.8 0.96 L
13 pana_35_T 35 T 1063 805 0.75729 NO 0.37 0.65 H
14 kabu_15_T 15 T 1063 791 0.74412 NO 0.25 0.76 M
15 gulf_15_F 15 F 1063 773 0.72719 NO 0.16 0.24 H
16 oedi_35_T 35 T 1063 767 0.72154 NO 0.43 0.88 M
17 vaud_15_T 15 T 1063 766 0.7206 NO 0.24 0.41 H
18 mont_35_F 35 F 1063 748 0.70367 NO 0.36 0.78 M
19 demo_15_F 15 F 1063 727 0.68391 NO 0.19 0.39 H
20 spee_75_F 75 F 1063 688 0.64722 NO 0.78 0.98 L
21 dwar_55_F 55 F 1063 663 0.62371 YES 0.64 0.76 L
22 carb_35_T 35 T 1063 629 0.59172 NO 0.39 0.31 H
23 bohr_15_T 15 T 1063 626 0.5889 NO 0.16 0.37 H
24 gang_15_F 15 F 1063 572 0.5381 NO 0.21 0.39 H
25 vitc_55_F 55 F 1063 556 0.52305 NO 0.63 0.84 L
26 hert_35_F 35 F 1063 551 0.51834 NO 0.41 0.65 M
27 pucc_15_F 15 F 1063 543 0.51082 NO 0.15 0.22 H
28 troy_15_T 15 T 1063 504 0.47413 NO 0.22 0.49 M
29 moza_55_F 55 F 1063 500 0.47037 NO 0.59 0.71 L
30 croc_75_F 75 F 1063 433 0.40734 NO 0.76 0.88 L
31 gees_55_F 55 F 1063 324 0.3048 NO 0.57 0.84 L
32 lute_35_F 35 F 1063 322 0.30292 NO 0.46 0.67 L
33 memo_75_F 75 F 1063 265 0.24929 NO 0.83 0.88 L

Binary file not shown.

Before

Width:  |  Height:  |  Size: 19 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 20 KiB

After

Width:  |  Height:  |  Size: 20 KiB

BIN
eohi1/linearity_plots.pdf Normal file

Binary file not shown.

875
eohi1/mixed anova - DGEN.r Normal file
View File

@ -0,0 +1,875 @@
# Mixed ANOVA Analysis for DGEN Variables
# EOHI Experiment Data Analysis - DGEN Level Analysis
# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
# futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
# Verify the specific variables we need
required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
"futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
}
# Define domain mapping
domain_mapping <- data.frame(
variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
"futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "DGEN_SCORE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
filter(!is.na(DGEN_SCORE))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME and DOMAIN
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME and DOMAIN:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
print(desc_stats_by_temporal)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(DGEN_SCORE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME and DOMAIN:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(DGEN_SCORE),
sd = sd(DGEN_SCORE),
q1 = quantile(DGEN_SCORE, 0.25),
q3 = quantile(DGEN_SCORE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each DOMAIN
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each DOMAIN:")
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across DOMAIN within each TIME:")
print(homogeneity_domain)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# =============================================================================
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
# =============================================================================
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
print("=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
# =============================================================================
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
# =============================================================================
print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = DGEN_SCORE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
# Show Mauchly's test for sphericity
print("Mauchly's Test of Sphericity:")
print(mixed_anova_model$Mauchly)
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
cat("\n=== CORRECTED F-TESTS ===\n")
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Extract generalized eta squared from ezANOVA (already calculated)
effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
effect_sizes$ges <- round(effect_sizes$ges, 5)
print("Generalized Eta Squared:")
print(effect_sizes)
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
print("\n=== POST-HOC COMPARISONS ===")
# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main effect of TIME
print("Main Effect of TIME:")
time_emmeans <- emmeans(aov_model, ~ TIME)
print("Estimated Marginal Means:")
print(time_emmeans)
print("\nPairwise Contrasts:")
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of DOMAIN
print("\nMain Effect of DOMAIN:")
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print("Estimated Marginal Means:")
print(domain_emmeans)
print("\nPairwise Contrasts:")
domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
print(domain_contrasts)
# Main effect of TEMPORAL_DO
print("\nMain Effect of TEMPORAL_DO:")
temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
print(temporal_contrasts)
# =============================================================================
# INTERACTION EXPLORATIONS
# =============================================================================
# TEMPORAL_DO × TIME Interaction
print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
print("Estimated Marginal Means:")
print(temporal_time_emmeans)
print("\nSimple Effects of TIME within each TEMPORAL_DO:")
temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_time_simple)
print("\nSimple Effects of TEMPORAL_DO within each TIME:")
temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
print(temporal_time_simple2)
# TIME × DOMAIN Interaction
print("\n=== TIME × DOMAIN INTERACTION ===")
time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
print("Estimated Marginal Means:")
print(time_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TIME:")
time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
print(time_domain_simple)
print("\nSimple Effects of TIME within each DOMAIN:")
time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(time_domain_simple2)
# TEMPORAL_DO × DOMAIN Interaction
print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
print("Estimated Marginal Means:")
print(temporal_domain_emmeans)
print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
print(temporal_domain_simple)
print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
print(temporal_domain_simple2)
# =============================================================================
# THREE-WAY INTERACTION ANALYSIS
# =============================================================================
print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
print("Estimated Marginal Means:")
print(three_way_emmeans)
print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
print(three_way_contrasts)
# =============================================================================
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# =============================================================================
# Cohen's d calculations (library already loaded)
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# =============================================================================
# 1. TEMPORAL_DO × TIME INTERACTION
# =============================================================================
print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
# Get simple effects of TIME within each TEMPORAL_DO
temporal_time_simple_df <- as.data.frame(temporal_time_simple)
calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
# Get simple effects of TEMPORAL_DO within each TIME
temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
# =============================================================================
# 2. TIME × DOMAIN INTERACTION
# =============================================================================
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
# Get simple effects of TIME within each DOMAIN
time_domain_simple2_df <- as.data.frame(time_domain_simple2)
calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
# Get simple effects of DOMAIN within each TIME
time_domain_simple_df <- as.data.frame(time_domain_simple)
calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
# =============================================================================
# 3. TEMPORAL_DO × DOMAIN INTERACTION
# =============================================================================
print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
# Get simple effects of TEMPORAL_DO within each DOMAIN
temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
# Get simple effects of DOMAIN within each TEMPORAL_DO
temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
# =============================================================================
# 4. THREE-WAY INTERACTION COHEN'S D
# =============================================================================
print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
# Get pairwise comparisons for the three-way interaction
three_way_contrasts_df <- as.data.frame(three_way_contrasts)
print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
print(three_way_contrasts_df)
# Calculate Cohen's d for significant three-way interaction effects
print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
# Extract significant comparisons (p < 0.05)
significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
if(nrow(significant_three_way) > 0) {
for(i in seq_len(nrow(significant_three_way))) {
comparison <- significant_three_way[i, ]
# Extract the grouping variables
temporal_do_level <- as.character(comparison$TEMPORAL_DO)
domain_level <- as.character(comparison$DOMAIN)
# Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
past_data <- long_data_clean$DGEN_SCORE[
long_data_clean$TEMPORAL_DO == temporal_do_level &
long_data_clean$DOMAIN == domain_level &
long_data_clean$TIME == "Past"
]
future_data <- long_data_clean$DGEN_SCORE[
long_data_clean$TEMPORAL_DO == temporal_do_level &
long_data_clean$DOMAIN == domain_level &
long_data_clean$TIME == "Future"
]
if(length(past_data) > 0 && length(future_data) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(past_data, future_data)
cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
cat(sprintf(" Past vs Future comparison\n"))
cat(sprintf(" n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat(sprintf(" Estimated difference: %.5f\n", comparison$estimate))
cat("\n")
}
}
} else {
cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
}
# =============================================================================
# INTERACTION PLOTS
# =============================================================================
print("=== INTERACTION PLOTS ===")
# Define color palette for DOMAIN (4 levels)
domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F",
"Values" = "#FFB000", "Life" = "#FE6100")
# TEMPORAL_DO × DOMAIN INTERACTION PLOT
# Create estimated marginal means for TEMPORAL_DO × DOMAIN
emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
# Prepare emmeans data frame
emmeans_temporal_domain <- emm_temporal_domain %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
)
# Prepare raw data for plotting
iPlot_temporal_domain <- long_data_clean %>%
dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
mutate(
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
)
# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
# Convert to numeric x-axis and add position offsets for dodging
dodge_width <- 0.6
iPlot_temporal_domain <- iPlot_temporal_domain %>%
mutate(
x_pos = as.numeric(TEMPORAL_DO),
domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
x_dodged = x_pos + domain_offset
)
emmeans_temporal_domain <- emmeans_temporal_domain %>%
mutate(
x_pos = as.numeric(TEMPORAL_DO),
domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
x_dodged = x_pos + domain_offset
)
interaction_plot_temporal_domain <- ggplot() +
# Distribution layer - violins (completely separated)
geom_violin(
data = iPlot_temporal_domain,
aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
alpha = 0.4,
color = NA,
trim = FALSE,
scale = "width",
width = dodge_width / 4
) +
# Emmeans error bars
geom_errorbar(
data = emmeans_temporal_domain,
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
width = 0.08,
linewidth = 0.8,
color = "black"
) +
# Emmeans points
geom_point(
data = emmeans_temporal_domain,
aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
size = 4,
stroke = 1,
color = "black"
) +
labs(
x = "Order",
y = "Mean absolute difference from present",
title = "TEMPORAL_DO × DOMAIN Interaction"
) +
scale_x_continuous(
breaks = c(1, 2),
labels = c("Past First", "Future First"),
limits = c(0.4, 2.6)
) +
scale_y_continuous(
limits = c(0, 10),
breaks = seq(0, 10, 2)
) +
scale_color_manual(name = "Domain", values = domain_colors) +
scale_fill_manual(name = "Domain", values = domain_colors) +
scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(interaction_plot_temporal_domain)
# =============================================================================
# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
# =============================================================================
# Create fresh emmeans data for emmeans-only plot
emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
x_pos = as.numeric(TEMPORAL_DO),
domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
x_dodged = x_pos + domain_offset
)
# Create emmeans-only plot
interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
geom_errorbar(
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
width = 0.1,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Order",
y = "Absolute difference from the present",
title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
) +
scale_x_continuous(
breaks = c(1, 2),
labels = c("Past First", "Future First"),
limits = c(0.4, 2.6)
) +
scale_y_continuous(
limits = c(3, 6),
breaks = seq(0, 10, 1)
) +
scale_color_manual(name = "Domain", values = domain_colors) +
scale_fill_manual(name = "Domain", values = domain_colors) +
scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(interaction_plot_emmeans_only)
# =============================================================================
# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
# =============================================================================
# Prepare emmeans data frame for TIME main effect
time_main_emm_df <- time_emmeans %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TIME = factor(TIME, levels = c("Past", "Future"))
)
# Define color palette for TIME
time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
time_main_plot <- ggplot(time_main_emm_df) +
geom_errorbar(
aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.15,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Time",
y = "Absolute difference from the present",
title = "Main Effect of TIME (Estimated Marginal Means)"
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(time_main_plot)

View File

@ -0,0 +1,769 @@
# Mixed ANOVA Analysis for Domain Means
# EOHI Experiment Data Analysis - Domain Level Analysis
# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
# NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Overall descriptive statistics by TIME and DOMAIN
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
# ASSUMPTION TESTING
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
# Create clean dataset (long_data is already filtered for NA values)
long_data_clean <- long_data
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each DOMAIN
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
# Test homogeneity across DOMAIN within each TIME
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# MIXED ANOVA ANALYSIS
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
# Show Mauchly's test for sphericity
print(mixed_anova_model$Mauchly)
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
# COHEN'S D FOR MAIN EFFECTS
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
print(temporal_time_simple)
# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
print(time_domain_simple)
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# 1. TEMPORAL_DO × TIME INTERACTION
# Get simple effects of TIME within each TEMPORAL_DO
temporal_time_simple_df <- as.data.frame(temporal_time_simple)
calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# 2. TIME × DOMAIN INTERACTION
# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
print("Past vs Future contrasts for each domain:")
print(time_domain_simple_df)
# Calculate Cohen's d for Past vs Future contrasts within each domain
print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
if(nrow(significant_time_domain) > 0) {
print("Significant Past vs Future contrasts within domains (p < 0.05):")
print(significant_time_domain)
print("\nCohen's d calculations for Past vs Future within each domain:")
for(i in seq_len(nrow(time_domain_simple_df))) {
comparison <- time_domain_simple_df[i, ]
domain_level <- as.character(comparison$DOMAIN)
# Get data for Past and Future within this domain
past_data <- long_data_clean$MEAN_DIFFERENCE[
long_data_clean$DOMAIN == domain_level &
long_data_clean$TIME == "Past"
]
future_data <- long_data_clean$MEAN_DIFFERENCE[
long_data_clean$DOMAIN == domain_level &
long_data_clean$TIME == "Future"
]
if(length(past_data) > 0 && length(future_data) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(past_data, future_data)
cat(sprintf("Domain: %s\n", domain_level))
cat(sprintf(" Past vs Future comparison\n"))
cat(sprintf(" n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat(sprintf(" Estimated difference: %.5f\n", comparison$estimate))
cat("\n")
}
}
} else {
cat("No significant Past vs Future contrasts found within any domain.\n")
}
# INTERACTION PLOTS
# Define color palettes
time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F",
"Values" = "#FFB000", "Life" = "#FE6100")
# Define TIME levels (Past, Future order)
time_levels <- c("Past", "Future")
# ============================================================
# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
# ============================================================
# Create fresh emmeans data for Plot 3
emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
TIME = factor(TIME, levels = time_levels),
x_pos = as.numeric(TEMPORAL_DO),
time_offset = (as.numeric(TIME) - 1.5) * 0.3,
x_dodged = x_pos + time_offset
)
# Create simple emmeans-only plot
interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
geom_errorbar(
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.1,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Order",
y = "Mean absolute difference from present",
title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
) +
scale_x_continuous(
breaks = c(1, 2),
labels = c("Past First", "Future First"),
limits = c(0.5, 2.5)
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(interaction_plot_emmeans_only)
# ============================================================
# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
# ============================================================
# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
# Prepare emmeans data frame for Plot 4
emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
# Prepare raw data for plotting with position offsets
dodge_width_plot4 <- 0.2
iPlot_plot4 <- long_data_clean %>%
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels),
x_pos = as.numeric(DOMAIN),
time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
x_dodged = x_pos + time_offset
)
emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
mutate(
x_pos = as.numeric(DOMAIN),
time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
x_dodged = x_pos + time_offset
)
# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
geom_errorbar(
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.1,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Domain",
y = "Mean absolute difference from present",
title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
) +
scale_x_continuous(
breaks = c(1, 2, 3, 4),
labels = c("Preferences", "Personality", "Values", "Life"),
limits = c(0.5, 4.5)
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(interaction_plot_time_domain_plot4)
# ============================================================
# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
# ============================================================
# Prepare emmeans data frame for TIME main effect
time_main_emm_df <- time_emmeans %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
TIME = factor(TIME, levels = c("Past", "Future"))
)
# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
time_main_plot <- ggplot(time_main_emm_df) +
geom_errorbar(
aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
width = 0.15,
linewidth = 1,
alpha = 0.8
) +
geom_point(
aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
size = 5,
stroke = 1.2,
color = "black"
) +
labs(
x = "Time",
y = "Mean absolute difference from present",
title = "Main Effect of TIME (Estimated Marginal Means)"
) +
scale_color_manual(name = "Temporal Direction", values = time_colors) +
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
theme_minimal(base_size = 13) +
theme(
axis.text = element_text(size = 11),
axis.title = element_text(size = 12),
plot.title = element_text(size = 14, hjust = 0.5),
legend.position = "right",
legend.title = element_text(size = 11),
legend.text = element_text(size = 10),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
)
print(time_main_plot)

View File

@ -0,0 +1,765 @@
# Mixed ANOVA Analysis for Personality Items
# EOHI Experiment Data Analysis - Item Level Analysis
# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
# NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$pID))))
# Verify the specific variables we need
required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
"NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required personality item variables found!")
}
# Define item mapping
item_mapping <- data.frame(
variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
"NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
time = c(rep("Past", 5), rep("Future", 5)),
item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
stringsAsFactors = FALSE
)
# Item mapping created
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(item_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$pID))))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME and ITEM
desc_stats <- long_data %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME and ITEM:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, ITEM) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
print(desc_stats_by_temporal)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, ITEM) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME and ITEM:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each ITEM
homogeneity_time <- long_data_clean %>%
group_by(ITEM) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each ITEM:")
print(homogeneity_time)
# Test homogeneity across ITEM within each TIME
homogeneity_item <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across ITEM within each TIME:")
print(homogeneity_item)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# =============================================================================
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
# =============================================================================
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × ITEM)
# First, let's check what values TEMPORAL_DO actually has
print("=== CHECKING TEMPORAL_DO VALUES ===")
print("Unique TEMPORAL_DO values:")
print(unique(long_data_clean$TEMPORAL_DO))
print("TEMPORAL_DO value counts:")
print(table(long_data_clean$TEMPORAL_DO))
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, ITEM, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!sym(group_var)) %>%
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, ITEM, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check for missing data patterns
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
# Check data balance
xtabs(~ pID + TIME + ITEM, data = long_data_clean)
# Check data dimensions and structure
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
# Check for complete cases
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
print(summary(as.vector(design_balance)))
# Check for any participants with missing combinations
missing_combos <- long_data_clean %>%
group_by(pID) %>%
summarise(
n_combinations = n(),
expected_combinations = 10, # 2 TIME × 5 ITEM = 10
missing_combinations = 10 - n_combinations,
.groups = 'drop'
)
print("Missing combinations per participant:")
print(missing_combos[missing_combos$missing_combinations > 0, ])
# Mixed ANOVA using aov() - Traditional approach
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)),
data = long_data_clean)
print("Mixed ANOVA Results (aov):")
print(summary(mixed_anova_model))
# Alternative: Using afex::aov_ez for cleaner output (optional)
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
mixed_anova_afex <- aov_ez(id = "pID",
dv = "MEAN_DIFFERENCE",
data = long_data_clean,
between = "TEMPORAL_DO",
within = c("TIME", "ITEM"))
print("Mixed ANOVA Results (afex):")
print(mixed_anova_afex)
# =============================================================================
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
# =============================================================================
# Sphericity tests using ezANOVA (library already loaded)
print("\n=== SPHERICITY TESTS ===")
# Test sphericity for ITEM (5 levels - within-subjects)
print("Mauchly's Test of Sphericity for ITEM:")
tryCatch({
# Create a temporary data frame for ezANOVA
temp_data <- long_data_clean
temp_data$id <- as.numeric(as.factor(temp_data$pID))
# Run ezANOVA to get sphericity tests
ez_item <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = ITEM,
type = 3,
detailed = TRUE)
print("ITEM Sphericity Test:")
print(ez_item$Mauchly)
}, error = function(e) {
print(paste("Error in ITEM sphericity test:", e$message))
})
# Test sphericity for TIME (2 levels - within-subjects)
print("\nMauchly's Test of Sphericity for TIME:")
tryCatch({
ez_time <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = TIME,
type = 3,
detailed = TRUE)
print("TIME Sphericity Test:")
print(ez_time$Mauchly)
}, error = function(e) {
print(paste("Error in TIME sphericity test:", e$message))
})
# Test sphericity for TIME × ITEM interaction
print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
tryCatch({
ez_interaction <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = .(TIME, ITEM),
type = 3,
detailed = TRUE)
print("TIME × ITEM Sphericity Test:")
print(ez_interaction$Mauchly)
}, error = function(e) {
print(paste("Error in TIME × ITEM sphericity test:", e$message))
})
# =============================================================================
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
# Get corrected results from ezANOVA
ez_corrected <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
within = .(TIME, ITEM),
type = 3,
detailed = TRUE)
print("Corrected ANOVA Results with Sphericity Corrections:")
print(ez_corrected$ANOVA)
# Show epsilon values for sphericity corrections
print("\nEpsilon Values for Sphericity Corrections:")
print(ez_corrected$Mauchly)
# Show sphericity-corrected results
print("\nSphericity-Corrected Results:")
print("Available elements in ez_corrected object:")
print(names(ez_corrected))
# Check if sphericity corrections are available
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
print(ez_corrected$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
# Get the sphericity corrections
sphericity_corr <- ez_corrected$`Sphericity Corrections`
# Extract original degrees of freedom from ANOVA table
anova_table <- ez_corrected$ANOVA
# Calculate corrected degrees of freedom
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Also show the corrected F-values and p-values with degrees of freedom
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
for(i in 1:nrow(corrected_df)) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
print("Check the Mauchly's test p-values above to determine if corrections are needed")
}
# =============================================================================
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
# =============================================================================
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
# Create a wide-format data for car package (library already loaded)
tryCatch({
# Convert to wide format for car package
wide_data <- long_data_clean %>%
select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
pivot_wider(names_from = c(TIME, ITEM),
values_from = MEAN_DIFFERENCE,
names_sep = "_")
# Create the repeated measures design
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
# Check if all columns exist
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
if(length(missing_cols) > 0) {
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
} else {
# Create the repeated measures design
rm_design <- as.matrix(wide_data[, within_vars])
# Calculate epsilon values
print("Epsilon Values from car package:")
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
# Interpretation
if(epsilon_gg < 0.75) {
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
} else if(epsilon_hf > 0.75) {
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
} else {
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
}
# =============================================================================
# MANUAL SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
# Apply corrections to the original ANOVA results
print("Applying Greenhouse-Geisser corrections to ITEM effects:")
# ITEM main effect (DFn = 4, DFd = 4244)
item_df_corrected_gg <- 4 * epsilon_gg
item_df_corrected_hf <- 4 * epsilon_hf
print(paste("ITEM: Original df = 4, 4244"))
print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
# TIME × ITEM interaction (DFn = 4, DFd = 4244)
interaction_df_corrected_gg <- 4 * epsilon_gg
interaction_df_corrected_hf <- 4 * epsilon_hf
print(paste("TIME × ITEM: Original df = 4, 4244"))
print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
# Note: You would need to recalculate p-values with these corrected dfs
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
print("The ezANOVA function should handle this automatically, but may not display the corrections")
}
}, error = function(e) {
print(paste("Error in manual epsilon calculation:", e$message))
})
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
# Effect size calculations (library already loaded)
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Calculate generalized eta squared for the aov model
print("Effect Sizes from aov() model:")
tryCatch({
# Extract effect sizes from aov model
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
print(round(aov_effects, 5))
}, error = function(e) {
print(paste("Error calculating effect sizes from aov:", e$message))
})
# Calculate effect sizes for ezANOVA model
print("\nEffect Sizes from ezANOVA model:")
tryCatch({
# ezANOVA provides partial eta squared, convert to generalized
ez_effects <- ez_corrected$ANOVA
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
print("Generalized Eta Squared from ezANOVA:")
print(round(ez_effects[, c("Effect", "ges")], 5))
}, error = function(e) {
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
})
# Extract effect sizes (generalized eta squared)
# For aov() objects, we need to extract from the summary
anova_summary <- summary(mixed_anova_model)
# =============================================================================
# NOTE: MIXED MODELS (LMER) NOT NEEDED
# =============================================================================
# For this balanced repeated measures design, Type III ANOVA with proper
# sphericity corrections (implemented above) is the most appropriate approach.
# Mixed models (lmer) are typically used for:
# - Unbalanced designs
# - Missing data patterns
# - Nested random effects
# - Large, complex datasets
#
# Your design is balanced and complete, making Type III ANOVA optimal.
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
print("\n=== POST-HOC COMPARISONS ===")
# Main effect of TIME
print("Main Effect of TIME:")
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
print("Estimated Marginal Means:")
print(time_emmeans)
print("\nPairwise Contrasts:")
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of ITEM
print("\nMain Effect of ITEM:")
item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
print("Estimated Marginal Means:")
print(item_emmeans)
print("\nPairwise Contrasts:")
item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
print(item_contrasts)
# =============================================================================
# INTERACTION EXPLORATIONS
# =============================================================================
# TIME × ITEM Interaction
print("\n=== TIME × ITEM INTERACTION ===")
time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
print("Estimated Marginal Means:")
print(time_item_emmeans)
print("\nSimple Effects of ITEM within each TIME:")
time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
print(time_item_simple)
print("\nSimple Effects of TIME within each ITEM:")
time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
print(time_item_simple2)
# =============================================================================
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
# =============================================================================
# =============================================================================
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# =============================================================================
# Cohen's d calculations (library already loaded)
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# =============================================================================
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
# =============================================================================
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
# Get simple effects of TIME within each ITEM
time_item_simple2_df <- as.data.frame(time_item_simple2)
calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
# Get simple effects of ITEM within each TIME
time_item_simple_df <- as.data.frame(time_item_simple)
calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")

View File

@ -0,0 +1,765 @@
# Mixed ANOVA Analysis for Preference Items
# EOHI Experiment Data Analysis - Item Level Analysis
# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
# NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$pID))))
# Verify the specific variables we need
required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
"NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required preference item variables found!")
}
# Define item mapping
item_mapping <- data.frame(
variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
"NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
time = c(rep("Past", 5), rep("Future", 5)),
item = rep(c("read", "music", "tv", "nap", "travel"), 2),
stringsAsFactors = FALSE
)
# Item mapping created
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(item_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$pID))))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME and ITEM
desc_stats <- long_data %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME and ITEM:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, ITEM) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
print(desc_stats_by_temporal)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, ITEM) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME and ITEM:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each ITEM
homogeneity_time <- long_data_clean %>%
group_by(ITEM) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each ITEM:")
print(homogeneity_time)
# Test homogeneity across ITEM within each TIME
homogeneity_item <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across ITEM within each TIME:")
print(homogeneity_item)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# =============================================================================
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
# =============================================================================
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × ITEM)
# First, let's check what values TEMPORAL_DO actually has
print("=== CHECKING TEMPORAL_DO VALUES ===")
print("Unique TEMPORAL_DO values:")
print(unique(long_data_clean$TEMPORAL_DO))
print("TEMPORAL_DO value counts:")
print(table(long_data_clean$TEMPORAL_DO))
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, ITEM, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!sym(group_var)) %>%
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, ITEM, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check for missing data patterns
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
# Check data balance
xtabs(~ pID + TIME + ITEM, data = long_data_clean)
# Check data dimensions and structure
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
# Check for complete cases
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
print(summary(as.vector(design_balance)))
# Check for any participants with missing combinations
missing_combos <- long_data_clean %>%
group_by(pID) %>%
summarise(
n_combinations = n(),
expected_combinations = 10, # 2 TIME × 5 ITEM = 10
missing_combinations = 10 - n_combinations,
.groups = 'drop'
)
print("Missing combinations per participant:")
print(missing_combos[missing_combos$missing_combinations > 0, ])
# Mixed ANOVA using aov() - Traditional approach
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)),
data = long_data_clean)
print("Mixed ANOVA Results (aov):")
print(summary(mixed_anova_model))
# Alternative: Using afex::aov_ez for cleaner output (optional)
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
mixed_anova_afex <- aov_ez(id = "pID",
dv = "MEAN_DIFFERENCE",
data = long_data_clean,
between = "TEMPORAL_DO",
within = c("TIME", "ITEM"))
print("Mixed ANOVA Results (afex):")
print(mixed_anova_afex)
# =============================================================================
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
# =============================================================================
# Sphericity tests using ezANOVA (library already loaded)
print("\n=== SPHERICITY TESTS ===")
# Test sphericity for ITEM (5 levels - within-subjects)
print("Mauchly's Test of Sphericity for ITEM:")
tryCatch({
# Create a temporary data frame for ezANOVA
temp_data <- long_data_clean
temp_data$id <- as.numeric(as.factor(temp_data$pID))
# Run ezANOVA to get sphericity tests
ez_item <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = ITEM,
type = 3,
detailed = TRUE)
print("ITEM Sphericity Test:")
print(ez_item$Mauchly)
}, error = function(e) {
print(paste("Error in ITEM sphericity test:", e$message))
})
# Test sphericity for TIME (2 levels - within-subjects)
print("\nMauchly's Test of Sphericity for TIME:")
tryCatch({
ez_time <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = TIME,
type = 3,
detailed = TRUE)
print("TIME Sphericity Test:")
print(ez_time$Mauchly)
}, error = function(e) {
print(paste("Error in TIME sphericity test:", e$message))
})
# Test sphericity for TIME × ITEM interaction
print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
tryCatch({
ez_interaction <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = .(TIME, ITEM),
type = 3,
detailed = TRUE)
print("TIME × ITEM Sphericity Test:")
print(ez_interaction$Mauchly)
}, error = function(e) {
print(paste("Error in TIME × ITEM sphericity test:", e$message))
})
# =============================================================================
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
# Get corrected results from ezANOVA
ez_corrected <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
within = .(TIME, ITEM),
type = 3,
detailed = TRUE)
print("Corrected ANOVA Results with Sphericity Corrections:")
print(ez_corrected$ANOVA)
# Show epsilon values for sphericity corrections
print("\nEpsilon Values for Sphericity Corrections:")
print(ez_corrected$Mauchly)
# Show sphericity-corrected results
print("\nSphericity-Corrected Results:")
print("Available elements in ez_corrected object:")
print(names(ez_corrected))
# Check if sphericity corrections are available
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
print(ez_corrected$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
# Get the sphericity corrections
sphericity_corr <- ez_corrected$`Sphericity Corrections`
# Extract original degrees of freedom from ANOVA table
anova_table <- ez_corrected$ANOVA
# Calculate corrected degrees of freedom
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Also show the corrected F-values and p-values with degrees of freedom
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
for(i in 1:nrow(corrected_df)) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
print("Check the Mauchly's test p-values above to determine if corrections are needed")
}
# =============================================================================
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
# =============================================================================
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
# Create a wide-format data for car package (library already loaded)
tryCatch({
# Convert to wide format for car package
wide_data <- long_data_clean %>%
select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
pivot_wider(names_from = c(TIME, ITEM),
values_from = MEAN_DIFFERENCE,
names_sep = "_")
# Create the repeated measures design
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
# Check if all columns exist
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
if(length(missing_cols) > 0) {
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
} else {
# Create the repeated measures design
rm_design <- as.matrix(wide_data[, within_vars])
# Calculate epsilon values
print("Epsilon Values from car package:")
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
# Interpretation
if(epsilon_gg < 0.75) {
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
} else if(epsilon_hf > 0.75) {
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
} else {
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
}
# =============================================================================
# MANUAL SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
# Apply corrections to the original ANOVA results
print("Applying Greenhouse-Geisser corrections to ITEM effects:")
# ITEM main effect (DFn = 4, DFd = 4244)
item_df_corrected_gg <- 4 * epsilon_gg
item_df_corrected_hf <- 4 * epsilon_hf
print(paste("ITEM: Original df = 4, 4244"))
print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
# TIME × ITEM interaction (DFn = 4, DFd = 4244)
interaction_df_corrected_gg <- 4 * epsilon_gg
interaction_df_corrected_hf <- 4 * epsilon_hf
print(paste("TIME × ITEM: Original df = 4, 4244"))
print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
# Note: You would need to recalculate p-values with these corrected dfs
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
print("The ezANOVA function should handle this automatically, but may not display the corrections")
}
}, error = function(e) {
print(paste("Error in manual epsilon calculation:", e$message))
})
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
# Effect size calculations (library already loaded)
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Calculate generalized eta squared for the aov model
print("Effect Sizes from aov() model:")
tryCatch({
# Extract effect sizes from aov model
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
print(round(aov_effects, 5))
}, error = function(e) {
print(paste("Error calculating effect sizes from aov:", e$message))
})
# Calculate effect sizes for ezANOVA model
print("\nEffect Sizes from ezANOVA model:")
tryCatch({
# ezANOVA provides partial eta squared, convert to generalized
ez_effects <- ez_corrected$ANOVA
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
print("Generalized Eta Squared from ezANOVA:")
print(round(ez_effects[, c("Effect", "ges")], 5))
}, error = function(e) {
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
})
# Extract effect sizes (generalized eta squared)
# For aov() objects, we need to extract from the summary
anova_summary <- summary(mixed_anova_model)
# =============================================================================
# NOTE: MIXED MODELS (LMER) NOT NEEDED
# =============================================================================
# For this balanced repeated measures design, Type III ANOVA with proper
# sphericity corrections (implemented above) is the most appropriate approach.
# Mixed models (lmer) are typically used for:
# - Unbalanced designs
# - Missing data patterns
# - Nested random effects
# - Large, complex datasets
#
# Your design is balanced and complete, making Type III ANOVA optimal.
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
print("\n=== POST-HOC COMPARISONS ===")
# Main effect of TIME
print("Main Effect of TIME:")
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
print("Estimated Marginal Means:")
print(time_emmeans)
print("\nPairwise Contrasts:")
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of ITEM
print("\nMain Effect of ITEM:")
item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
print("Estimated Marginal Means:")
print(item_emmeans)
print("\nPairwise Contrasts:")
item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
print(item_contrasts)
# =============================================================================
# INTERACTION EXPLORATIONS
# =============================================================================
# TIME × ITEM Interaction
print("\n=== TIME × ITEM INTERACTION ===")
time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
print("Estimated Marginal Means:")
print(time_item_emmeans)
print("\nSimple Effects of ITEM within each TIME:")
time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
print(time_item_simple)
print("\nSimple Effects of TIME within each ITEM:")
time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
print(time_item_simple2)
# =============================================================================
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
# =============================================================================
# =============================================================================
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# =============================================================================
# Cohen's d calculations (library already loaded)
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# =============================================================================
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
# =============================================================================
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
# Get simple effects of TIME within each ITEM
time_item_simple2_df <- as.data.frame(time_item_simple2)
calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
# Get simple effects of ITEM within each TIME
time_item_simple_df <- as.data.frame(time_item_simple)
calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")

View File

@ -0,0 +1,765 @@
# Mixed ANOVA Analysis for Values Items
# EOHI Experiment Data Analysis - Item Level Analysis
# Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
# NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
# Load required libraries
library(tidyverse)
library(ez)
library(car)
library(afex) # For aov_ez (cleaner ANOVA output)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(effectsize) # For effect size calculations
# Global options to remove scientific notation
options(scipen = 999)
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Read the data
data <- read.csv("exp1.csv")
# Display basic information about the dataset
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$pID))))
# Verify the specific variables we need
required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
"NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
missing_vars <- required_vars[!required_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
} else {
print("All required values item variables found!")
}
# Define item mapping
item_mapping <- data.frame(
variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
"NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
time = c(rep("Past", 5), rep("Future", 5)),
item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
stringsAsFactors = FALSE
)
# Item mapping created
# Efficient data pivoting using pivot_longer
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(item_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
print(paste("Number of participants:", length(unique(long_data$pID))))
# =============================================================================
# DESCRIPTIVE STATISTICS
# =============================================================================
# Overall descriptive statistics by TIME and ITEM
desc_stats <- long_data %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TIME and ITEM:")
print(desc_stats)
# Descriptive statistics by between-subjects factors
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, ITEM) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
print(desc_stats_by_temporal)
# =============================================================================
# ASSUMPTION TESTING
# =============================================================================
# Remove missing values for assumption testing
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
# 1. Missing values check
missing_summary <- long_data %>%
group_by(TIME, ITEM) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print("Missing values by TIME and ITEM:")
print(missing_summary)
# 2. Outlier detection
outlier_summary <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print("Outlier summary (IQR method):")
print(outlier_summary)
# 3. Anderson-Darling normality test (streamlined)
normality_results <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print("Anderson-Darling normality test results:")
# Round only the numeric columns
normality_results_rounded <- normality_results %>%
mutate(across(where(is.numeric), ~ round(.x, 5)))
print(normality_results_rounded)
# 4. Homogeneity of variance (Levene's test)
# Test homogeneity across TIME within each ITEM
homogeneity_time <- long_data_clean %>%
group_by(ITEM) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across TIME within each ITEM:")
print(homogeneity_time)
# Test homogeneity across ITEM within each TIME
homogeneity_item <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
.groups = 'drop'
)
print("Homogeneity of variance across ITEM within each TIME:")
print(homogeneity_item)
# =============================================================================
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
# =============================================================================
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# =============================================================================
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
# =============================================================================
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × ITEM)
# First, let's check what values TEMPORAL_DO actually has
print("=== CHECKING TEMPORAL_DO VALUES ===")
print("Unique TEMPORAL_DO values:")
print(unique(long_data_clean$TEMPORAL_DO))
print("TEMPORAL_DO value counts:")
print(table(long_data_clean$TEMPORAL_DO))
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, ITEM, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!sym(group_var)) %>%
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, ITEM) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, ITEM, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
# =============================================================================
# MIXED ANOVA ANALYSIS
# =============================================================================
# Check for missing data patterns
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
# Check data balance
xtabs(~ pID + TIME + ITEM, data = long_data_clean)
# Check data dimensions and structure
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
# Check for complete cases
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
print(summary(as.vector(design_balance)))
# Check for any participants with missing combinations
missing_combos <- long_data_clean %>%
group_by(pID) %>%
summarise(
n_combinations = n(),
expected_combinations = 10, # 2 TIME × 5 ITEM = 10
missing_combinations = 10 - n_combinations,
.groups = 'drop'
)
print("Missing combinations per participant:")
print(missing_combos[missing_combos$missing_combinations > 0, ])
# Mixed ANOVA using aov() - Traditional approach
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)),
data = long_data_clean)
print("Mixed ANOVA Results (aov):")
print(summary(mixed_anova_model))
# Alternative: Using afex::aov_ez for cleaner output (optional)
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
mixed_anova_afex <- aov_ez(id = "pID",
dv = "MEAN_DIFFERENCE",
data = long_data_clean,
between = "TEMPORAL_DO",
within = c("TIME", "ITEM"))
print("Mixed ANOVA Results (afex):")
print(mixed_anova_afex)
# =============================================================================
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
# =============================================================================
# Sphericity tests using ezANOVA (library already loaded)
print("\n=== SPHERICITY TESTS ===")
# Test sphericity for ITEM (5 levels - within-subjects)
print("Mauchly's Test of Sphericity for ITEM:")
tryCatch({
# Create a temporary data frame for ezANOVA
temp_data <- long_data_clean
temp_data$id <- as.numeric(as.factor(temp_data$pID))
# Run ezANOVA to get sphericity tests
ez_item <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = ITEM,
type = 3,
detailed = TRUE)
print("ITEM Sphericity Test:")
print(ez_item$Mauchly)
}, error = function(e) {
print(paste("Error in ITEM sphericity test:", e$message))
})
# Test sphericity for TIME (2 levels - within-subjects)
print("\nMauchly's Test of Sphericity for TIME:")
tryCatch({
ez_time <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = TIME,
type = 3,
detailed = TRUE)
print("TIME Sphericity Test:")
print(ez_time$Mauchly)
}, error = function(e) {
print(paste("Error in TIME sphericity test:", e$message))
})
# Test sphericity for TIME × ITEM interaction
print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
tryCatch({
ez_interaction <- ezANOVA(data = temp_data,
dv = MEAN_DIFFERENCE,
wid = id,
within = .(TIME, ITEM),
type = 3,
detailed = TRUE)
print("TIME × ITEM Sphericity Test:")
print(ez_interaction$Mauchly)
}, error = function(e) {
print(paste("Error in TIME × ITEM sphericity test:", e$message))
})
# =============================================================================
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
# Get corrected results from ezANOVA
ez_corrected <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
within = .(TIME, ITEM),
type = 3,
detailed = TRUE)
print("Corrected ANOVA Results with Sphericity Corrections:")
print(ez_corrected$ANOVA)
# Show epsilon values for sphericity corrections
print("\nEpsilon Values for Sphericity Corrections:")
print(ez_corrected$Mauchly)
# Show sphericity-corrected results
print("\nSphericity-Corrected Results:")
print("Available elements in ez_corrected object:")
print(names(ez_corrected))
# Check if sphericity corrections are available
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
print(ez_corrected$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
# Get the sphericity corrections
sphericity_corr <- ez_corrected$`Sphericity Corrections`
# Extract original degrees of freedom from ANOVA table
anova_table <- ez_corrected$ANOVA
# Calculate corrected degrees of freedom
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Also show the corrected F-values and p-values with degrees of freedom
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
for(i in 1:nrow(corrected_df)) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
print("Check the Mauchly's test p-values above to determine if corrections are needed")
}
# =============================================================================
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
# =============================================================================
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
# Create a wide-format data for car package (library already loaded)
tryCatch({
# Convert to wide format for car package
wide_data <- long_data_clean %>%
select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
pivot_wider(names_from = c(TIME, ITEM),
values_from = MEAN_DIFFERENCE,
names_sep = "_")
# Create the repeated measures design
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
# Check if all columns exist
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
if(length(missing_cols) > 0) {
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
} else {
# Create the repeated measures design
rm_design <- as.matrix(wide_data[, within_vars])
# Calculate epsilon values
print("Epsilon Values from car package:")
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
# Interpretation
if(epsilon_gg < 0.75) {
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
} else if(epsilon_hf > 0.75) {
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
} else {
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
}
# =============================================================================
# MANUAL SPHERICITY CORRECTIONS
# =============================================================================
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
# Apply corrections to the original ANOVA results
print("Applying Greenhouse-Geisser corrections to ITEM effects:")
# ITEM main effect (DFn = 4, DFd = 4244)
item_df_corrected_gg <- 4 * epsilon_gg
item_df_corrected_hf <- 4 * epsilon_hf
print(paste("ITEM: Original df = 4, 4244"))
print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
# TIME × ITEM interaction (DFn = 4, DFd = 4244)
interaction_df_corrected_gg <- 4 * epsilon_gg
interaction_df_corrected_hf <- 4 * epsilon_hf
print(paste("TIME × ITEM: Original df = 4, 4244"))
print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
# Note: You would need to recalculate p-values with these corrected dfs
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
print("The ezANOVA function should handle this automatically, but may not display the corrections")
}
}, error = function(e) {
print(paste("Error in manual epsilon calculation:", e$message))
})
# =============================================================================
# EFFECT SIZES (GENERALIZED ETA SQUARED)
# =============================================================================
# Effect size calculations (library already loaded)
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
# Calculate generalized eta squared for the aov model
print("Effect Sizes from aov() model:")
tryCatch({
# Extract effect sizes from aov model
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
print(round(aov_effects, 5))
}, error = function(e) {
print(paste("Error calculating effect sizes from aov:", e$message))
})
# Calculate effect sizes for ezANOVA model
print("\nEffect Sizes from ezANOVA model:")
tryCatch({
# ezANOVA provides partial eta squared, convert to generalized
ez_effects <- ez_corrected$ANOVA
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
print("Generalized Eta Squared from ezANOVA:")
print(round(ez_effects[, c("Effect", "ges")], 5))
}, error = function(e) {
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
})
# Extract effect sizes (generalized eta squared)
# For aov() objects, we need to extract from the summary
anova_summary <- summary(mixed_anova_model)
# =============================================================================
# NOTE: MIXED MODELS (LMER) NOT NEEDED
# =============================================================================
# For this balanced repeated measures design, Type III ANOVA with proper
# sphericity corrections (implemented above) is the most appropriate approach.
# Mixed models (lmer) are typically used for:
# - Unbalanced designs
# - Missing data patterns
# - Nested random effects
# - Large, complex datasets
#
# Your design is balanced and complete, making Type III ANOVA optimal.
# =============================================================================
# POST-HOC COMPARISONS
# =============================================================================
# Post-hoc comparisons using emmeans
print("\n=== POST-HOC COMPARISONS ===")
# Main effect of TIME
print("Main Effect of TIME:")
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
print("Estimated Marginal Means:")
print(time_emmeans)
print("\nPairwise Contrasts:")
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
print(time_contrasts)
# Main effect of ITEM
print("\nMain Effect of ITEM:")
item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
print("Estimated Marginal Means:")
print(item_emmeans)
print("\nPairwise Contrasts:")
item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
print(item_contrasts)
# =============================================================================
# INTERACTION EXPLORATIONS
# =============================================================================
# TIME × ITEM Interaction
print("\n=== TIME × ITEM INTERACTION ===")
time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
print("Estimated Marginal Means:")
print(time_item_emmeans)
print("\nSimple Effects of ITEM within each TIME:")
time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
print(time_item_simple)
print("\nSimple Effects of TIME within each ITEM:")
time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
print(time_item_simple2)
# =============================================================================
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
# =============================================================================
# =============================================================================
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
# =============================================================================
# Cohen's d calculations (library already loaded)
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
# =============================================================================
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
# =============================================================================
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
# Get simple effects of TIME within each ITEM
time_item_simple2_df <- as.data.frame(time_item_simple2)
calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
# Get simple effects of ITEM within each TIME
time_item_simple_df <- as.data.frame(time_item_simple)
calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")

BIN
eohi1/normality_plots.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,10 @@
"","NPast_mean_total","NFut_mean_total","DGEN_past_mean","DGEN_fut_mean","domain_mean","DGEN_mean","AOT_total","CRT_correct","CRT_int"
"NPast_mean_total",1,0.58407,0.37645,0.19034,0.91767,0.33077,0.04531,-0.00144,-0.01053
"NFut_mean_total",0.58407,1,0.31658,0.32376,0.85851,0.37059,-0.04928,-0.074,0.05859
"DGEN_past_mean",0.37645,0.31658,1,0.77074,0.3928,0.92613,-0.20675,-0.04166,0.0038
"DGEN_fut_mean",0.19034,0.32376,0.77074,1,0.27874,0.92466,-0.28285,-0.0818,0.03967
"domain_mean",0.91767,0.85851,0.3928,0.27874,1,0.39038,0.0045,-0.03713,0.02203
"DGEN_mean",0.33077,0.37059,0.92613,0.92466,0.39038,1,-0.23213,-0.05422,0.01483
"AOT_total",0.04531,-0.04928,-0.20675,-0.28285,0.0045,-0.23213,1,0.24279,-0.20318
"CRT_correct",-0.00144,-0.074,-0.04166,-0.0818,-0.03713,-0.05422,0.24279,1,-0.89661
"CRT_int",-0.01053,0.05859,0.0038,0.03967,0.02203,0.01483,-0.20318,-0.89661,1
1 NPast_mean_total NFut_mean_total DGEN_past_mean DGEN_fut_mean domain_mean DGEN_mean AOT_total CRT_correct CRT_int
1 NPast_mean_total NFut_mean_total DGEN_past_mean DGEN_fut_mean domain_mean DGEN_mean AOT_total CRT_correct CRT_int
2 NPast_mean_total 1 0.58407 0.37645 0.19034 0.91767 0.33077 0.04531 -0.00144 -0.01053
3 NFut_mean_total 0.58407 1 0.31658 0.32376 0.85851 0.37059 -0.04928 -0.074 0.05859
4 DGEN_past_mean 0.37645 0.31658 1 0.77074 0.3928 0.92613 -0.20675 -0.04166 0.0038
5 DGEN_fut_mean 0.19034 0.32376 0.77074 1 0.27874 0.92466 -0.28285 -0.0818 0.03967
6 domain_mean 0.91767 0.85851 0.3928 0.27874 1 0.39038 0.0045 -0.03713 0.02203
7 DGEN_mean 0.33077 0.37059 0.92613 0.92466 0.39038 1 -0.23213 -0.05422 0.01483
8 AOT_total 0.04531 -0.04928 -0.20675 -0.28285 0.0045 -0.23213 1 0.24279 -0.20318
9 CRT_correct -0.00144 -0.074 -0.04166 -0.0818 -0.03713 -0.05422 0.24279 1 -0.89661
10 CRT_int -0.01053 0.05859 0.0038 0.03967 0.02203 0.01483 -0.20318 -0.89661 1

View File

@ -0,0 +1,232 @@
options(scipen = 999)
library(dplyr)
library(car)
library(lmtest)
library(stargazer)
library(sandwich)
library(lmtest)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
mutate(demo_edu = as.factor(demo_edu))
# examine data object
str(data)
colSums(is.na(data))
sapply(data, class)
levels(data$demo_edu)
data$demo_edu <- factor(data$demo_edu, levels = c(
"High School (or equivalent)",
"Trade School (non-military)",
"College Diploma/Certificate",
"University - Undergraduate",
"University - Graduate (Masters)",
"University - PhD",
"Professional Degree (ex. JD/MD)"
))
levels(data$demo_edu)
# Create dummy variables
dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
dummy_df <- as.data.frame(dummy_vars)
# Rename columns with meaningful names (excluding reference level)
colnames(dummy_df) <- c(
"edu_highschool", # reference level (will be dropped)
"edu_trade",
"edu_college",
"edu_uni_undergrad",
"edu_uni_masters",
"edu_uni_phd",
"edu_prof"
)
# Add to your data
data <- cbind(data, dummy_df)
data <- data %>% select(-starts_with("edu_highschool"))
#### MODEL 1 - DGEN ####
model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad +
edu_uni_masters + edu_uni_phd + edu_prof, data = data)
# Model 1 diagnostics
par(mfrow = c(2, 2))
plot(model_DGEN, which = 1) # Residuals vs Fitted
plot(model_DGEN, which = 2) # Normal Q-Q, normality
hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
shapiro.test(residuals(model_DGEN))
plot(model_DGEN, which = 3) # Scale-Location
plot(model_DGEN, which = 4) # Cook's Distance
# Model 1 specific tests
vif(model_DGEN) # Multicollinearity
dwtest(model_DGEN) # Independence
outlierTest(model_DGEN) # Outliers
# Look at the specific influential cases
data[c(670, 388, 760), ]
# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
# normality violated but sample size is robust to violation
# no multicollinearity
# no autocorrelation (samples are independent)
#results
print(summary(model_DGEN))
print(AIC(model_DGEN))
# Create a nice formatted table
stargazer(model_DGEN, type = "text",
title = "Regression Results: Education and EOHI-DGEN",
dep.var.labels = "EOHI-DGEN Mean",
covariate.labels = c("Trade School", "College", "University Undergrad",
"University Masters", "University PhD", "Professional Degree"),
report = "vcsp",
add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
# Use robust standard errors (doesn't change coefficients, just SEs)
modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
stargazer(modelDGEN_robust, type = "text",
title = "Regression Results: Education and EOHI-DGEN",
dep.var.labels = "EOHI-DGEN Mean",
covariate.labels = c("Trade School", "College", "University Undergrad",
"University Masters", "University PhD", "Professional Degree"),
report = "vcsp")
#### MODEL 2 - DOMAIN ####
model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad +
edu_uni_masters + edu_uni_phd + edu_prof, data = data)
# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
plot(model_domain, which = 1) # Residuals vs Fitted
plot(model_domain, which = 2) # Normal Q-Q, normality
hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
shapiro.test(residuals(model_domain))
plot(model_domain, which = 3) # Scale-Location
plot(model_domain, which = 4) # Cook's Distance
# Model 2 specific tests
vif(model_domain) # Multicollinearity
dwtest(model_domain) # Independence
outlierTest(model_domain) # Outliers
# Check if the autocorrelation is real or artifactual
# Plot residuals against observation order
plot(residuals(model_domain), type = "l")
abline(h = 0, col = "red")
# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
# normality violated but sample size is robust to violation
# no multicollinearity
# auto correlation is significant, may be due to aggregated measure of multiple repeated measures
# Reset plotting to 1x1
# par(mfrow = c(1, 1))
print(summary(model_domain))
print(AIC(model_domain))
stargazer(model_domain, type = "text",
title = "Regression Results: Education and EHI Domain",
dep.var.labels = "EHI Domain Mean",
covariate.labels = c("Trade School", "College", "University Undergrad",
"University Masters", "University PhD", "Professional Degree"),
report = "vcsp", # This shows coefficients, SEs, and p-values
add.lines = list(c("AIC", round(AIC(model_domain), 2))))
# Use robust standard errors (doesn't change coefficients, just SEs)
modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
stargazer(modelDOMAIN_robust, type = "text",
title = "Regression Results: Education and EHI Domain",
dep.var.labels = "EHI Domain Mean",
covariate.labels = c("Trade School", "College", "University Undergrad",
"University Masters", "University PhD", "Professional Degree"),
report = "vcsp")
#### PLOTS ####
library(ggplot2)
library(dplyr)
# Calculate means and confidence intervals for EOHI-DGEN
edu_summary_DGEN <- data %>%
group_by(demo_edu) %>%
summarise(
mean_DGEN = mean(eohiDGEN_mean, na.rm = TRUE),
n = n(),
se_DGEN = sd(eohiDGEN_mean, na.rm = TRUE) / sqrt(n()),
ci_lower_DGEN = mean_DGEN - 1.96 * se_DGEN,
ci_upper_DGEN = mean_DGEN + 1.96 * se_DGEN
)
# Calculate means and confidence intervals for EHI Domain
edu_summary_domain <- data %>%
group_by(demo_edu) %>%
summarise(
mean_domain = mean(ehi_global_mean, na.rm = TRUE),
n = n(),
se_domain = sd(ehi_global_mean, na.rm = TRUE) / sqrt(n()),
ci_lower_domain = mean_domain - 1.96 * se_domain,
ci_upper_domain = mean_domain + 1.96 * se_domain
)
# Plot 1: EOHI-DGEN means with confidence intervals
p1 <- ggplot(edu_summary_DGEN, aes(x = demo_edu, y = mean_DGEN)) +
geom_point(size = 3, color = "steelblue") +
geom_errorbar(aes(ymin = ci_lower_DGEN, ymax = ci_upper_DGEN),
width = 0.1, color = "steelblue", linewidth = 1) +
labs(
title = "Mean EOHI-DGEN by Education Level",
subtitle = "Error bars show 95% confidence intervals",
x = "Education Level",
y = "Mean EOHI-DGEN Score"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10, color = "gray60")
)
# Plot 2: EHI Domain means with confidence intervals
p2 <- ggplot(edu_summary_domain, aes(x = demo_edu, y = mean_domain)) +
geom_point(size = 3, color = "darkgreen") +
geom_errorbar(aes(ymin = ci_lower_domain, ymax = ci_upper_domain),
width = 0.1, color = "darkgreen", linewidth = 1) +
labs(
title = "Mean EHI Domain by Education Level",
subtitle = "Error bars show 95% confidence intervals",
x = "Education Level",
y = "Mean EHI Domain Score"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 10, color = "gray60")
)
# Display the plots
print(p1)
print(p2)
# Save the plots
ggsave("education_DGEN_means.png", p1, width = 10, height = 6, dpi = 300)
ggsave("education_domain_means.png", p2, width = 10, height = 6, dpi = 300)

View File

@ -0,0 +1,320 @@
options(scipen = 999)
library(dplyr)
library(car)
library(lmtest)
library(stargazer)
library(sandwich)
library(lmtest)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
filter(demo_sex != "Prefer not to say")
str(data)
colSums(is.na(data))
sapply(data, class)
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
#### REGRESSION MODELS ####
# MODEL 1: Age only - EOHI
age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
par(mfrow = c(2, 2))
plot(age_DGEN)
print(shapiro.test(residuals(age_DGEN)))
print(summary(age_DGEN))
print(AIC(age_DGEN))
# MODEL 1: Age only - EHI
age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
par(mfrow = c(2, 2))
plot(age_domain)
print(shapiro.test(residuals(age_domain)))
print(summary(age_domain))
print(AIC(age_domain))
# MODEL 2: Sex only - EOHI
sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(sex_DGEN)
print(shapiro.test(residuals(sex_DGEN)))
print(summary(sex_DGEN))
print(AIC(sex_DGEN))
# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n.
# p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed.
# MODEL 2: Sex only - EHI
sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(sex_domain)
print(shapiro.test(residuals(sex_domain)))
print(summary(sex_domain))
print(AIC(sex_domain))
# MODEL 3: Age + Sex + Interaction - EOHI
interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(interaction_DGEN)
print(shapiro.test(residuals(interaction_DGEN)))
vif_DGEN <- vif(interaction_DGEN)
print(vif_DGEN)
print(summary(interaction_DGEN))
print(AIC(interaction_DGEN))
# MODEL 3: Age + Sex + Interaction - EHI
interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
par(mfrow = c(2, 2))
plot(interaction_domain)
print(shapiro.test(residuals(interaction_domain)))
vif_domain <- vif(interaction_domain)
print(vif_domain)
print(summary(interaction_domain))
print(AIC(interaction_domain))
#### troubleshooting ####
# Clear any existing plots
# dev.off()
#### PLOTS ####
# Create visual figures for age models
library(ggplot2)
# Figure 1: age_DGEN model
p1 <- ggplot(data, aes(x = age_centered, y = eohiDGEN_mean)) +
geom_point(alpha = 0.6, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
labs(
title = "Age and EOHI-DGEN Relationship",
x = "Age (centered)",
y = "EOHI-DGEN Mean",
subtitle = paste("R² =", round(summary(age_DGEN)$r.squared, 3),
", p < 0.001")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
plot.subtitle = element_text(size = 10, color = "gray60")
)
# Figure 2: age_domain model
p2 <- ggplot(data, aes(x = age_centered, y = ehi_global_mean)) +
geom_point(alpha = 0.6, color = "darkgreen") +
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
labs(
title = "Age and EHI Domain Relationship",
x = "Age (centered)",
y = "EHI Domain Mean",
subtitle = paste("R² =", round(summary(age_domain)$r.squared, 3),
", p < 0.001")
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
axis.title = element_text(size = 12),
plot.subtitle = element_text(size = 10, color = "gray60")
)
# Save the plots
ggsave("age_DGEN_plot.png", p1, width = 8, height = 6, dpi = 300)
ggsave("age_domain_plot.png", p2, width = 8, height = 6, dpi = 300)
# Display the plots
print(p1)
print(p2)
#### HTML file ####
# Create comprehensive HTML report grouped by model
library(htmltools)
# Start HTML document
html_content <- htmltools::div(
htmltools::h1("Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain"),
# EOHI-DGEN Models Section
htmltools::h2("EOHI-DGEN Models"),
# Age Model
htmltools::h3("1. Age Model (age_DGEN)"),
htmltools::div(
style = "margin-bottom: 30px;",
htmltools::h4("Model Results, AIC, and VIF"),
htmltools::HTML(
stargazer(
age_DGEN,
type = "html",
title = "Age Model: EOHI-DGEN",
dep.var.labels = "EOHI-DGEN Mean",
covariate.labels = c("Age (centered)"),
report = "vcsp*",
add.lines = list(
c("AIC", round(AIC(age_DGEN), 2)),
c("Max VIF", "N/A")
)
)
),
htmltools::h4("Assumption Diagnostic Plots"),
htmltools::img(src = "age_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;"),
htmltools::h4("Model Visualization"),
htmltools::img(src = "age_DGEN_plot.png", style = "width: 100%; max-width: 800px;")
),
# Sex Model
htmltools::h3("2. Sex Model (sex_DGEN)"),
htmltools::div(
style = "margin-bottom: 30px;",
htmltools::h4("Model Results, AIC, and VIF"),
htmltools::HTML(
stargazer(
sex_DGEN,
type = "html",
title = "Sex Model: EOHI-DGEN",
dep.var.labels = "EOHI-DGEN Mean",
covariate.labels = c("Sex (dummy)"),
report = "vcsp*",
add.lines = list(
c("AIC", round(AIC(sex_DGEN), 2)),
c("Max VIF", "N/A")
)
)
),
htmltools::h4("Assumption Diagnostic Plots"),
htmltools::img(src = "sex_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
),
# Interaction Model
htmltools::h3("3. Interaction Model (interaction_DGEN)"),
htmltools::div(
style = "margin-bottom: 30px;",
htmltools::h4("Model Results, AIC, and VIF"),
htmltools::HTML(
stargazer(
interaction_DGEN,
type = "html",
title = "Interaction Model: EOHI-DGEN",
dep.var.labels = "EOHI-DGEN Mean",
covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
report = "vcsp*",
add.lines = list(
c("AIC", round(AIC(interaction_DGEN), 2)),
c("Max VIF", max_vif_DGEN)
)
)
),
htmltools::h4("Assumption Diagnostic Plots"),
htmltools::img(src = "interaction_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
),
# EHI Domain Models Section
htmltools::h2("EHI Domain Models"),
# Age Model
htmltools::h3("1. Age Model (age_domain)"),
htmltools::div(
style = "margin-bottom: 30px;",
htmltools::h4("Model Results, AIC, and VIF"),
htmltools::HTML(
stargazer(
age_domain,
type = "html",
title = "Age Model: EHI Domain",
dep.var.labels = "EHI Domain Mean",
covariate.labels = c("Age (centered)"),
report = "vcsp*",
add.lines = list(
c("AIC", round(AIC(age_domain), 2)),
c("Max VIF", "N/A")
)
)
),
htmltools::h4("Assumption Diagnostic Plots"),
htmltools::img(src = "age_domain_assumptions.png", style = "width: 100%; max-width: 800px;"),
htmltools::h4("Model Visualization"),
htmltools::img(src = "age_domain_plot.png", style = "width: 100%; max-width: 800px;")
),
# Sex Model
htmltools::h3("2. Sex Model (sex_domain)"),
htmltools::div(
style = "margin-bottom: 30px;",
htmltools::h4("Model Results, AIC, and VIF"),
htmltools::HTML(
stargazer(
sex_domain,
type = "html",
title = "Sex Model: EHI Domain",
dep.var.labels = "EHI Domain Mean",
covariate.labels = c("Sex (dummy)"),
report = "vcsp*",
add.lines = list(
c("AIC", round(AIC(sex_domain), 2)),
c("Max VIF", "N/A")
)
)
),
htmltools::h4("Assumption Diagnostic Plots"),
htmltools::img(src = "sex_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
),
# Interaction Model
htmltools::h3("3. Interaction Model (interaction_domain)"),
htmltools::div(
style = "margin-bottom: 30px;",
htmltools::h4("Model Results, AIC, and VIF"),
htmltools::HTML(
stargazer(
interaction_domain,
type = "html",
title = "Interaction Model: EHI Domain",
dep.var.labels = "EHI Domain Mean",
covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
report = "vcsp*",
add.lines = list(
c("AIC", round(AIC(interaction_domain), 2)),
c("Max VIF", max_vif_domain)
)
)
),
htmltools::h4("Assumption Diagnostic Plots"),
htmltools::img(src = "interaction_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
)
)
# Save HTML content
htmltools::save_html(html_content, "regression_analysis_report.html")
print("HTML report created: regression_analysis_report.html")

View File

@ -0,0 +1,31 @@
<table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
<tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
<tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
<tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
<tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
<tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
<tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
<tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
<tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
</table>

View File

@ -0,0 +1,31 @@
<table style="text-align:center"><caption><strong>Regression Models: EHI Domain</strong></caption>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="3">EHI Domain Mean</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td><td></td><td>-0.006</td></tr>
<tr><td style="text-align:left"></td><td>(0.001)</td><td></td><td>(0.001)</td></tr>
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td></td><td>p = 0.00001<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.031</td><td>0.033</td></tr>
<tr><td style="text-align:left"></td><td></td><td>(0.031)</td><td>(0.030)</td></tr>
<tr><td style="text-align:left"></td><td></td><td>p = 0.311</td><td>p = 0.274</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>-0.0003</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td>(0.002)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.883</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Constant</td><td>0.137</td><td>0.121</td><td>0.120</td></tr>
<tr><td style="text-align:left"></td><td>(0.015)</td><td>(0.022)</td><td>(0.021)</td></tr>
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td><td>1562.88</td><td>1518.52</td></tr>
<tr><td style="text-align:left">Max VIF</td><td>NA</td><td>NA</td><td>2.22</td></tr>
<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td><td>0.001</td><td>0.045</td></tr>
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td><td>0.00003</td><td>0.043</td></tr>
<tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td><td>0.504 (df = 1059)</td><td>0.494 (df = 1057)</td></tr>
<tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td><td>1.029 (df = 1; 1059)</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
</table>

View File

@ -0,0 +1,31 @@
<table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
<tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
<tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
<tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
<tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
<tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
<tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
<tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
<tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
<tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
</table>

View File

@ -0,0 +1,62 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<style>body{background-color:white;}</style>
</head>
<body>
<div>
<h1>Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain</h1>
<h2>EOHI-DGEN Models</h2>
<h3>1. Age Model (age_DGEN)</h3>
<div style="margin-bottom: 30px;">
<h4>Model Results, AIC, and VIF</h4>
<table style="text-align:center"><caption><strong>Age Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td></tr> <tr><td style="text-align:left"></td><td>(0.003)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.417</td></tr> <tr><td style="text-align:left"></td><td>(0.053)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
<h4>Assumption Diagnostic Plots</h4>
<img src="age_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
<h4>Model Visualization</h4>
<img src="age_DGEN_plot.png" style="width: 100%; max-width: 800px;"/>
</div>
<h3>2. Sex Model (sex_DGEN)</h3>
<div style="margin-bottom: 30px;">
<h4>Model Results, AIC, and VIF</h4>
<table style="text-align:center"><caption><strong>Sex Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.099</td></tr> <tr><td style="text-align:left"></td><td>(0.107)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.355</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.367</td></tr> <tr><td style="text-align:left"></td><td>(0.076)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4192.48</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>-0.0001</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.742 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>0.859 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
<h4>Assumption Diagnostic Plots</h4>
<img src="sex_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
</div>
<h3>3. Interaction Model (interaction_DGEN)</h3>
<div style="margin-bottom: 30px;">
<h4>Model Results, AIC, and VIF</h4>
<table style="text-align:center"><caption><strong>Interaction Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.018</td></tr> <tr><td style="text-align:left"></td><td>(0.005)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.0004<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.103</td></tr> <tr><td style="text-align:left"></td><td>(0.106)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.331</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.007)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.402</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.364</td></tr> <tr><td style="text-align:left"></td><td>(0.075)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4176.42</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.020</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
<h4>Assumption Diagnostic Plots</h4>
<img src="interaction_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
</div>
<h2>EHI Domain Models</h2>
<h3>1. Age Model (age_domain)</h3>
<div style="margin-bottom: 30px;">
<h4>Model Results, AIC, and VIF</h4>
<table style="text-align:center"><caption><strong>Age Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.137</td></tr> <tr><td style="text-align:left"></td><td>(0.015)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
<h4>Assumption Diagnostic Plots</h4>
<img src="age_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
<h4>Model Visualization</h4>
<img src="age_domain_plot.png" style="width: 100%; max-width: 800px;"/>
</div>
<h3>2. Sex Model (sex_domain)</h3>
<div style="margin-bottom: 30px;">
<h4>Model Results, AIC, and VIF</h4>
<table style="text-align:center"><caption><strong>Sex Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.031</td></tr> <tr><td style="text-align:left"></td><td>(0.031)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.311</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.121</td></tr> <tr><td style="text-align:left"></td><td>(0.022)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1562.88</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.00003</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.504 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>1.029 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
<h4>Assumption Diagnostic Plots</h4>
<img src="sex_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
</div>
<h3>3. Interaction Model (interaction_domain)</h3>
<div style="margin-bottom: 30px;">
<h4>Model Results, AIC, and VIF</h4>
<table style="text-align:center"><caption><strong>Interaction Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.033</td></tr> <tr><td style="text-align:left"></td><td>(0.030)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.274</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>-0.0003</td></tr> <tr><td style="text-align:left"></td><td>(0.002)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.883</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.120</td></tr> <tr><td style="text-align:left"></td><td>(0.021)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1518.52</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.045</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.494 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
<h4>Assumption Diagnostic Plots</h4>
<img src="interaction_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
</div>
</div>
</body>
</html>

View File

@ -0,0 +1,204 @@
# Cronbach's Alpha Reliability Analysis
# For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
# 5 items per domain per time point
# Load required libraries
library(psych)
library(dplyr)
library(tidyr)
# Read the data
data <- read.csv("exp1.csv")
# Define the scale variables for each domain and time point
# Past time point scales
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
"NPastDiff_val_performance", "NPastDiff_val_justice")
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
"NPastDiff_life_important", "NPastDiff_life_change")
# Future time point scales
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable",
"NFutDiff_pers_anxious", "NFutDiff_pers_complex")
fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion",
"NFutDiff_val_performance", "NFutDiff_val_justice")
fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied",
"NFutDiff_life_important", "NFutDiff_life_change")
# Function to calculate Cronbach's alpha and return detailed results
calc_cronbach_alpha <- function(data, var_names, scale_name) {
# Check for missing values
scale_data <- data[, var_names]
missing_info <- data.frame(
Variable = var_names,
Missing_Count = colSums(is.na(scale_data)),
Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
)
# Remove rows with any missing values for reliability analysis
complete_data <- scale_data[complete.cases(scale_data), ]
cat("\n", "="*60, "\n")
cat("SCALE:", scale_name, "\n")
cat("="*60, "\n")
cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
cat("Original sample size:", nrow(data), "\n")
cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
cat("Missing data summary:\n")
print(missing_info)
if(nrow(complete_data) < 3) {
cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
return(NULL)
}
# Calculate Cronbach's alpha
alpha_result <- alpha(complete_data, check.keys = TRUE)
cat("\nCronbach's Alpha Results:\n")
cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
cat("Number of items:", alpha_result$total$nvar, "\n")
# Item statistics
cat("\nItem Statistics:\n")
item_stats <- data.frame(
Item = var_names,
Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
Mean = round(alpha_result$item.stats$mean, 4),
SD = round(alpha_result$item.stats$sd, 4)
)
print(item_stats)
# Check assumptions
cat("\nAssumption Checks:\n")
# 1. Check for sufficient sample size (minimum 30 recommended)
sample_size_ok <- nrow(complete_data) >= 30
cat("Sample size adequate (≥30):", sample_size_ok, "\n")
# 2. Check for adequate inter-item correlations (should be > 0.30)
inter_item_cors <- cor(complete_data)
inter_item_cors[lower.tri(inter_item_cors)] <- NA
diag(inter_item_cors) <- NA
inter_item_cors_flat <- as.vector(inter_item_cors)
inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
# 3. Check for negative correlations (concerning for unidimensionality)
negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
cat("Number of negative inter-item correlations:", negative_cors, "\n")
# 4. Check item variances (should be roughly similar)
item_vars <- apply(complete_data, 2, var)
var_ratio <- max(item_vars) / min(item_vars)
cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
return(alpha_result)
}
# Calculate Cronbach's alpha for all scales
cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
cat("\nData: exp1.csv")
cat("\nTotal sample size:", nrow(data))
# Past time point analyses
past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
# Future time point analyses
fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
# Summary table
cat("\n", "="*80, "\n")
cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
cat("\n", "="*80, "\n")
summary_results <- data.frame(
Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
"Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
Alpha = c(
if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
),
Items = rep(5, 8),
Interpretation = c(
if(!is.null(past_pref_alpha)) {
alpha_val <- past_pref_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(past_pers_alpha)) {
alpha_val <- past_pers_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(past_val_alpha)) {
alpha_val <- past_val_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(past_life_alpha)) {
alpha_val <- past_life_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(fut_pref_alpha)) {
alpha_val <- fut_pref_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(fut_pers_alpha)) {
alpha_val <- fut_pers_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(fut_val_alpha)) {
alpha_val <- fut_val_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data",
if(!is.null(fut_life_alpha)) {
alpha_val <- fut_life_alpha$total$raw_alpha
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
} else "Insufficient data"
)
)
print(summary_results)
# Save results to CSV
write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
cat("\n", "="*80, "\n")
cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
cat("\n", "="*80, "\n")
cat("α ≥ 0.90: Excellent reliability\n")
cat("α ≥ 0.80: Good reliability\n")
cat("α ≥ 0.70: Acceptable reliability\n")
cat("α ≥ 0.60: Questionable reliability\n")
cat("α < 0.60: Poor reliability\n")

BIN
eohi1/residual_plots.pdf Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View File

@ -0,0 +1,14 @@
"","eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean","ehi_pref_mean","ehi_pers_mean","ehi_val_mean","ehi_life_mean","ehi_global_mean","AOT_total","CRT_correct","CRT_int"
"eohiDGEN_pref",1,0.27683,0.23964,0.24515,0.66949,0.22538,0.09704,0.08007,0.12507,0.21415,0.06836,0.05361,-0.05819
"eohiDGEN_pers",0.27683,1,0.31901,0.18576,0.70868,0.21513,0.18684,0.13159,0.12199,0.26738,0.0916,0.02566,-0.01732
"eohiDGEN_val",0.23964,0.31901,1,0.13719,0.68261,0.14816,0.12424,0.23448,0.06907,0.21756,0.10396,0.07479,-0.06044
"eohiDGEN_life",0.24515,0.18576,0.13719,1,0.25499,0.02053,0.06528,0.04652,0.29721,0.23854,0.11795,0.03949,-0.0013
"eohiDGEN_mean",0.66949,0.70868,0.68261,0.25499,1,0.26963,0.17831,0.19688,0.1283,0.31052,0.11102,0.06665,-0.05731
"ehi_pref_mean",0.22538,0.21513,0.14816,0.02053,0.26963,1,0.23765,0.19925,0.0625,0.51305,0.0504,0.06892,-0.06473
"ehi_pers_mean",0.09704,0.18684,0.12424,0.06528,0.17831,0.23765,1,0.22369,0.12488,0.5826,0.03132,0.05133,-0.04906
"ehi_val_mean",0.08007,0.13159,0.23448,0.04652,0.19688,0.19925,0.22369,1,0.02752,0.51294,0.01519,0.00599,-0.03791
"ehi_life_mean",0.12507,0.12199,0.06907,0.29721,0.1283,0.0625,0.12488,0.02752,1,0.59399,0.07295,0.01639,-0.00581
"ehi_global_mean",0.21415,0.26738,0.21756,0.23854,0.31052,0.51305,0.5826,0.51294,0.59399,1,0.0849,0.05133,-0.05746
"AOT_total",0.06836,0.0916,0.10396,0.11795,0.11102,0.0504,0.03132,0.01519,0.07295,0.0849,1,0.2609,-0.20963
"CRT_correct",0.05361,0.02566,0.07479,0.03949,0.06665,0.06892,0.05133,0.00599,0.01639,0.05133,0.2609,1,-0.87087
"CRT_int",-0.05819,-0.01732,-0.06044,-0.0013,-0.05731,-0.06473,-0.04906,-0.03791,-0.00581,-0.05746,-0.20963,-0.87087,1
1 eohiDGEN_pref eohiDGEN_pers eohiDGEN_val eohiDGEN_life eohiDGEN_mean ehi_pref_mean ehi_pers_mean ehi_val_mean ehi_life_mean ehi_global_mean AOT_total CRT_correct CRT_int
1 eohiDGEN_pref eohiDGEN_pers eohiDGEN_val eohiDGEN_life eohiDGEN_mean ehi_pref_mean ehi_pers_mean ehi_val_mean ehi_life_mean ehi_global_mean AOT_total CRT_correct CRT_int
2 eohiDGEN_pref 1 0.27683 0.23964 0.24515 0.66949 0.22538 0.09704 0.08007 0.12507 0.21415 0.06836 0.05361 -0.05819
3 eohiDGEN_pers 0.27683 1 0.31901 0.18576 0.70868 0.21513 0.18684 0.13159 0.12199 0.26738 0.0916 0.02566 -0.01732
4 eohiDGEN_val 0.23964 0.31901 1 0.13719 0.68261 0.14816 0.12424 0.23448 0.06907 0.21756 0.10396 0.07479 -0.06044
5 eohiDGEN_life 0.24515 0.18576 0.13719 1 0.25499 0.02053 0.06528 0.04652 0.29721 0.23854 0.11795 0.03949 -0.0013
6 eohiDGEN_mean 0.66949 0.70868 0.68261 0.25499 1 0.26963 0.17831 0.19688 0.1283 0.31052 0.11102 0.06665 -0.05731
7 ehi_pref_mean 0.22538 0.21513 0.14816 0.02053 0.26963 1 0.23765 0.19925 0.0625 0.51305 0.0504 0.06892 -0.06473
8 ehi_pers_mean 0.09704 0.18684 0.12424 0.06528 0.17831 0.23765 1 0.22369 0.12488 0.5826 0.03132 0.05133 -0.04906
9 ehi_val_mean 0.08007 0.13159 0.23448 0.04652 0.19688 0.19925 0.22369 1 0.02752 0.51294 0.01519 0.00599 -0.03791
10 ehi_life_mean 0.12507 0.12199 0.06907 0.29721 0.1283 0.0625 0.12488 0.02752 1 0.59399 0.07295 0.01639 -0.00581
11 ehi_global_mean 0.21415 0.26738 0.21756 0.23854 0.31052 0.51305 0.5826 0.51294 0.59399 1 0.0849 0.05133 -0.05746
12 AOT_total 0.06836 0.0916 0.10396 0.11795 0.11102 0.0504 0.03132 0.01519 0.07295 0.0849 1 0.2609 -0.20963
13 CRT_correct 0.05361 0.02566 0.07479 0.03949 0.06665 0.06892 0.05133 0.00599 0.01639 0.05133 0.2609 1 -0.87087
14 CRT_int -0.05819 -0.01732 -0.06044 -0.0013 -0.05731 -0.06473 -0.04906 -0.03791 -0.00581 -0.05746 -0.20963 -0.87087 1

145
eohi2/EHI reliability.html Normal file
View File

@ -0,0 +1,145 @@
<html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Cronbach's Alpha</h2><table>
<thead>
<tr>
<th style="text-align:left;"> </th>
<th style="text-align:right;"> raw_alpha </th>
<th style="text-align:right;"> std.alpha </th>
<th style="text-align:right;"> G6(smc) </th>
<th style="text-align:right;"> average_r </th>
<th style="text-align:right;"> S/N </th>
<th style="text-align:right;"> ase </th>
<th style="text-align:right;"> mean </th>
<th style="text-align:right;"> sd </th>
<th style="text-align:right;"> median_r </th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left;"> </td>
<td style="text-align:right;"> 0.5709145 </td>
<td style="text-align:right;"> 0.6860723 </td>
<td style="text-align:right;"> 0.6450534 </td>
<td style="text-align:right;"> 0.3533207 </td>
<td style="text-align:right;"> 2.185446 </td>
<td style="text-align:right;"> 0.0229978 </td>
<td style="text-align:right;"> 0.138301 </td>
<td style="text-align:right;"> 0.8228433 </td>
<td style="text-align:right;"> 0.3517941 </td>
</tr>
</tbody>
</table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.77987</p><h2>Item-Level Statistics</h2><table>
<thead>
<tr>
<th style="text-align:left;"> </th>
<th style="text-align:right;"> n </th>
<th style="text-align:right;"> raw.r </th>
<th style="text-align:right;"> std.r </th>
<th style="text-align:right;"> r.cor </th>
<th style="text-align:right;"> r.drop </th>
<th style="text-align:right;"> mean </th>
<th style="text-align:right;"> sd </th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left;"> ehiDGEN_5_mean </td>
<td style="text-align:right;"> 489 </td>
<td style="text-align:right;"> 0.7653498 </td>
<td style="text-align:right;"> 0.6789150 </td>
<td style="text-align:right;"> 0.5035473 </td>
<td style="text-align:right;"> 0.4480613 </td>
<td style="text-align:right;"> 0.0620314 </td>
<td style="text-align:right;"> 1.4573526 </td>
</tr>
<tr>
<td style="text-align:left;"> ehiDGEN_10_mean </td>
<td style="text-align:right;"> 489 </td>
<td style="text-align:right;"> 0.8686359 </td>
<td style="text-align:right;"> 0.7381306 </td>
<td style="text-align:right;"> 0.6097580 </td>
<td style="text-align:right;"> 0.5098844 </td>
<td style="text-align:right;"> 0.3006135 </td>
<td style="text-align:right;"> 1.8924455 </td>
</tr>
<tr>
<td style="text-align:left;"> ehi5_global_mean </td>
<td style="text-align:right;"> 489 </td>
<td style="text-align:right;"> 0.5032176 </td>
<td style="text-align:right;"> 0.7114339 </td>
<td style="text-align:right;"> 0.5651807 </td>
<td style="text-align:right;"> 0.3871421 </td>
<td style="text-align:right;"> 0.0675187 </td>
<td style="text-align:right;"> 0.4620107 </td>
</tr>
<tr>
<td style="text-align:left;"> ehi10_global_mean </td>
<td style="text-align:right;"> 489 </td>
<td style="text-align:right;"> 0.5705196 </td>
<td style="text-align:right;"> 0.7420342 </td>
<td style="text-align:right;"> 0.6269733 </td>
<td style="text-align:right;"> 0.4474763 </td>
<td style="text-align:right;"> 0.1230402 </td>
<td style="text-align:right;"> 0.5252241 </td>
</tr>
</tbody>
</table><h3>Alpha if Item Dropped</h3><table>
<thead>
<tr>
<th style="text-align:left;"> </th>
<th style="text-align:right;"> raw_alpha </th>
<th style="text-align:right;"> std.alpha </th>
<th style="text-align:right;"> G6(smc) </th>
<th style="text-align:right;"> average_r </th>
<th style="text-align:right;"> S/N </th>
<th style="text-align:right;"> alpha se </th>
<th style="text-align:right;"> var.r </th>
<th style="text-align:right;"> med.r </th>
</tr>
</thead>
<tbody>
<tr>
<td style="text-align:left;"> ehiDGEN_5_mean </td>
<td style="text-align:right;"> 0.4125021 </td>
<td style="text-align:right;"> 0.6576471 </td>
<td style="text-align:right;"> 0.5754825 </td>
<td style="text-align:right;"> 0.3903632 </td>
<td style="text-align:right;"> 1.920963 </td>
<td style="text-align:right;"> 0.0273016 </td>
<td style="text-align:right;"> 0.0105384 </td>
<td style="text-align:right;"> 0.4162485 </td>
</tr>
<tr>
<td style="text-align:left;"> ehiDGEN_10_mean </td>
<td style="text-align:right;"> 0.4091906 </td>
<td style="text-align:right;"> 0.6003996 </td>
<td style="text-align:right;"> 0.5190903 </td>
<td style="text-align:right;"> 0.3337035 </td>
<td style="text-align:right;"> 1.502500 </td>
<td style="text-align:right;"> 0.0345773 </td>
<td style="text-align:right;"> 0.0161841 </td>
<td style="text-align:right;"> 0.2873396 </td>
</tr>
<tr>
<td style="text-align:left;"> ehi5_global_mean </td>
<td style="text-align:right;"> 0.5572211 </td>
<td style="text-align:right;"> 0.6271436 </td>
<td style="text-align:right;"> 0.5440163 </td>
<td style="text-align:right;"> 0.3592479 </td>
<td style="text-align:right;"> 1.681998 </td>
<td style="text-align:right;"> 0.0259296 </td>
<td style="text-align:right;"> 0.0113815 </td>
<td style="text-align:right;"> 0.4162485 </td>
</tr>
<tr>
<td style="text-align:left;"> ehi10_global_mean </td>
<td style="text-align:right;"> 0.5282880 </td>
<td style="text-align:right;"> 0.5963510 </td>
<td style="text-align:right;"> 0.5041975 </td>
<td style="text-align:right;"> 0.3299683 </td>
<td style="text-align:right;"> 1.477400 </td>
<td style="text-align:right;"> 0.0271341 </td>
<td style="text-align:right;"> 0.0068450 </td>
<td style="text-align:right;"> 0.2873396 </td>
</tr>
</tbody>
</table></body></html>

File diff suppressed because it is too large Load Diff

View File

BIN
eohi2/Rplots.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,7 @@
,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
sex_dummy,0.079,,,,,
demo_age_1,-0.22***,-0.028,,,,
edu_num,-0.027,0.0094,-0.078,,,
aot_total,-0.028,-0.072,-0.20***,0.10*,,
crt_correct,0.11*,-0.16***,-0.0044,0.12**,-0.037,
crt_int,-0.12**,0.14**,0.028,-0.11*,-0.011,-0.88***
1 stdEHI_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
1 stdEHI_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
2 sex_dummy 0.079
3 demo_age_1 -0.22*** -0.028
4 edu_num -0.027 0.0094 -0.078
5 aot_total -0.028 -0.072 -0.20*** 0.10*
6 crt_correct 0.11* -0.16*** -0.0044 0.12** -0.037
7 crt_int -0.12** 0.14** 0.028 -0.11* -0.011 -0.88***

View File

@ -0,0 +1,7 @@
,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
sex_dummy,0.080650819,,,,,
demo_age_1,1.40537E-06,0.536013997,,,,
edu_num,0.551130545,0.835590798,0.086262901,,,
aot_total,0.545115793,0.112852544,5.87906E-06,0.025659604,,
crt_correct,0.011928378,0.000483116,0.923101803,0.00884239,0.4122211,
crt_int,0.007552737,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
1 stdEHI_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
1 stdEHI_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
2 sex_dummy 0.080650819
3 demo_age_1 1.40537E-06 0.536013997
4 edu_num 0.551130545 0.835590798 0.086262901
5 aot_total 0.545115793 0.112852544 5.87906E-06 0.025659604
6 crt_correct 0.011928378 0.000483116 0.923101803 0.00884239 0.4122211
7 crt_int 0.007552737 0.001816488 0.533287266 0.012949289 0.810348767 9.5293E-160

View File

@ -0,0 +1,100 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
df <- read.csv("eohi2.csv")
data <- df %>%
select(stdEHI_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(stdEHI_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "STD_EHI_correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "STD_EHI_correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to STD_EHI_correlation_matrix.csv")
print("P-values saved to STD_EHI_correlation_pvalues.csv")

View File

@ -0,0 +1,63 @@
Variable1,Variable2,Spearman_r,P_value
ehiDGEN_5_Pers,aot_total,-0.136252077,0.002533005
ehi5_global_mean,crt_int,-0.111415172,0.013696124
ehi5_pref_MEAN,crt_int,-0.111269786,0.013820353
ehi5_pref_MEAN,crt_correct,0.092043471,0.041901166
ehi5_global_mean,crt_correct,0.091771139,0.042513801
,,,
ehi10_global_mean,crt_correct,0.086125328,0.057016745
ehi10_global_mean,crt_int,-0.077752437,0.085875618
ehi10_global_mean,aot_total,0.038353442,0.397405642
ehi10_pers_MEAN,crt_int,-0.078358781,0.083451324
ehi10_pers_MEAN,crt_correct,0.062086308,0.170454491
ehi10_pers_MEAN,aot_total,0.008627548,0.849074663
ehi10_pref_MEAN,crt_correct,0.076608495,0.090603647
ehi10_pref_MEAN,crt_int,-0.020948272,0.64400852
ehi10_pref_MEAN,aot_total,-0.006633556,0.883671698
ehi10_val_MEAN,crt_correct,0.05989196,0.186098498
ehi10_val_MEAN,crt_int,-0.057978747,0.200580439
ehi10_val_MEAN,aot_total,-0.003835901,0.932573186
ehi5.10_global_mean,crt_correct,0.033565462,0.458964083
ehi5.10_global_mean,aot_total,-0.031442998,0.487869173
ehi5.10_global_mean,crt_int,-0.023334683,0.606722562
ehi5.10_pers_MEAN,crt_correct,0.030973863,0.494387766
ehi5.10_pers_MEAN,crt_int,-0.02351475,0.603948546
ehi5.10_pers_MEAN,aot_total,-0.006387901,0.887950695
ehi5.10_pref_MEAN,aot_total,-0.012595122,0.781151691
ehi5.10_pref_MEAN,crt_correct,-0.011596204,0.798118255
ehi5.10_pref_MEAN,crt_int,0.00927269,0.837939503
ehi5.10_val_MEAN,crt_int,-0.02343742,0.60513915
ehi5.10_val_MEAN,aot_total,-0.01868822,0.680165418
ehi5.10_val_MEAN,crt_correct,0.016882557,0.709594395
ehi5_global_mean,aot_total,-0.012144664,0.788790373
ehi5_pers_MEAN,crt_correct,0.041879574,0.35541717
ehi5_pers_MEAN,crt_int,-0.038984568,0.38967884
ehi5_pers_MEAN,aot_total,-0.002320273,0.959183882
ehi5_pref_MEAN,aot_total,-0.009156402,0.839944313
ehi5_val_MEAN,crt_int,-0.060691086,0.180283214
ehi5_val_MEAN,aot_total,-0.043640227,0.335537257
ehi5_val_MEAN,crt_correct,0.038349744,0.397451186
ehiDGEN_10_mean,crt_int,-0.060692684,0.180271722
ehiDGEN_10_mean,crt_correct,0.048693261,0.282530257
ehiDGEN_10_mean,aot_total,-0.007003919,0.877226879
ehiDGEN_10_Pers,crt_int,-0.08321639,0.065963243
ehiDGEN_10_Pers,crt_correct,0.062811394,0.165507368
ehiDGEN_10_Pers,aot_total,-0.005213733,0.908446087
ehiDGEN_10_Pref,crt_correct,0.056920022,0.208938511
ehiDGEN_10_Pref,crt_int,-0.054154647,0.231949195
ehiDGEN_10_Pref,aot_total,0.012229383,0.787352177
ehiDGEN_10_Val,crt_correct,-0.046875479,0.300907385
ehiDGEN_10_Val,aot_total,-0.041722577,0.35722513
ehiDGEN_10_Val,crt_int,0.029931685,0.509033307
ehiDGEN_5_mean,aot_total,-0.068430961,0.130753688
ehiDGEN_5_mean,crt_correct,0.047639811,0.293085717
ehiDGEN_5_mean,crt_int,-0.042472828,0.348637399
ehiDGEN_5_Pers,crt_correct,0.061428736,0.175035746
ehiDGEN_5_Pers,crt_int,-0.038746148,0.392587023
ehiDGEN_5_Pref,crt_int,-0.042178047,0.351995885
ehiDGEN_5_Pref,aot_total,-0.038691604,0.393254174
ehiDGEN_5_Pref,crt_correct,0.033401162,0.461166903
ehiDGEN_5_Val,aot_total,-0.080834838,0.074119126
ehiDGEN_5_Val,crt_int,-0.024116805,0.594715088
ehiDGEN_5_Val,crt_correct,0.02187202,0.629462582
,,,
1 Variable1 Variable2 Spearman_r P_value
1 Variable1 Variable2 Spearman_r P_value
2 ehiDGEN_5_Pers aot_total -0.136252077 0.002533005
3 ehi5_global_mean crt_int -0.111415172 0.013696124
4 ehi5_pref_MEAN crt_int -0.111269786 0.013820353
5 ehi5_pref_MEAN crt_correct 0.092043471 0.041901166
6 ehi5_global_mean crt_correct 0.091771139 0.042513801
7
8 ehi10_global_mean crt_correct 0.086125328 0.057016745
9 ehi10_global_mean crt_int -0.077752437 0.085875618
10 ehi10_global_mean aot_total 0.038353442 0.397405642
11 ehi10_pers_MEAN crt_int -0.078358781 0.083451324
12 ehi10_pers_MEAN crt_correct 0.062086308 0.170454491
13 ehi10_pers_MEAN aot_total 0.008627548 0.849074663
14 ehi10_pref_MEAN crt_correct 0.076608495 0.090603647
15 ehi10_pref_MEAN crt_int -0.020948272 0.64400852
16 ehi10_pref_MEAN aot_total -0.006633556 0.883671698
17 ehi10_val_MEAN crt_correct 0.05989196 0.186098498
18 ehi10_val_MEAN crt_int -0.057978747 0.200580439
19 ehi10_val_MEAN aot_total -0.003835901 0.932573186
20 ehi5.10_global_mean crt_correct 0.033565462 0.458964083
21 ehi5.10_global_mean aot_total -0.031442998 0.487869173
22 ehi5.10_global_mean crt_int -0.023334683 0.606722562
23 ehi5.10_pers_MEAN crt_correct 0.030973863 0.494387766
24 ehi5.10_pers_MEAN crt_int -0.02351475 0.603948546
25 ehi5.10_pers_MEAN aot_total -0.006387901 0.887950695
26 ehi5.10_pref_MEAN aot_total -0.012595122 0.781151691
27 ehi5.10_pref_MEAN crt_correct -0.011596204 0.798118255
28 ehi5.10_pref_MEAN crt_int 0.00927269 0.837939503
29 ehi5.10_val_MEAN crt_int -0.02343742 0.60513915
30 ehi5.10_val_MEAN aot_total -0.01868822 0.680165418
31 ehi5.10_val_MEAN crt_correct 0.016882557 0.709594395
32 ehi5_global_mean aot_total -0.012144664 0.788790373
33 ehi5_pers_MEAN crt_correct 0.041879574 0.35541717
34 ehi5_pers_MEAN crt_int -0.038984568 0.38967884
35 ehi5_pers_MEAN aot_total -0.002320273 0.959183882
36 ehi5_pref_MEAN aot_total -0.009156402 0.839944313
37 ehi5_val_MEAN crt_int -0.060691086 0.180283214
38 ehi5_val_MEAN aot_total -0.043640227 0.335537257
39 ehi5_val_MEAN crt_correct 0.038349744 0.397451186
40 ehiDGEN_10_mean crt_int -0.060692684 0.180271722
41 ehiDGEN_10_mean crt_correct 0.048693261 0.282530257
42 ehiDGEN_10_mean aot_total -0.007003919 0.877226879
43 ehiDGEN_10_Pers crt_int -0.08321639 0.065963243
44 ehiDGEN_10_Pers crt_correct 0.062811394 0.165507368
45 ehiDGEN_10_Pers aot_total -0.005213733 0.908446087
46 ehiDGEN_10_Pref crt_correct 0.056920022 0.208938511
47 ehiDGEN_10_Pref crt_int -0.054154647 0.231949195
48 ehiDGEN_10_Pref aot_total 0.012229383 0.787352177
49 ehiDGEN_10_Val crt_correct -0.046875479 0.300907385
50 ehiDGEN_10_Val aot_total -0.041722577 0.35722513
51 ehiDGEN_10_Val crt_int 0.029931685 0.509033307
52 ehiDGEN_5_mean aot_total -0.068430961 0.130753688
53 ehiDGEN_5_mean crt_correct 0.047639811 0.293085717
54 ehiDGEN_5_mean crt_int -0.042472828 0.348637399
55 ehiDGEN_5_Pers crt_correct 0.061428736 0.175035746
56 ehiDGEN_5_Pers crt_int -0.038746148 0.392587023
57 ehiDGEN_5_Pref crt_int -0.042178047 0.351995885
58 ehiDGEN_5_Pref aot_total -0.038691604 0.393254174
59 ehiDGEN_5_Pref crt_correct 0.033401162 0.461166903
60 ehiDGEN_5_Val aot_total -0.080834838 0.074119126
61 ehiDGEN_5_Val crt_int -0.024116805 0.594715088
62 ehiDGEN_5_Val crt_correct 0.02187202 0.629462582
63

View File

@ -0,0 +1,14 @@
,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
ehiDGEN_10_mean,0.42***,,,,,,,,
ehi5_global_mean,0.21***,0.21***,,,,,,,
ehi10_global_mean,0.19***,0.37***,0.43***,,,,,,
sex_dummy,0.074,0.053,0.023,0.051,,,,,
demo_age_1,-0.053,-0.22***,-0.21***,-0.26***,-0.028,,,,
edu_num,-0.031,-0.037,0.083,-0.073,0.0094,-0.078,,,
aot_total,-0.064,-0.0067,-0.0032,0.050,-0.072,-0.20***,0.10*,,
crt_correct,0.045,0.053,0.093*,0.090*,-0.16***,-0.0044,0.12**,-0.037,
crt_int,-0.041,-0.065,-0.11*,-0.083,0.14*,0.028,-0.11*,-0.011,-0.88***
,,,,,,,,,
*p<0.05,,,,,,,,,
**p<0.01,,,,,,,,,
***p<0.001,,,,,,,,,
1 ehiDGEN_5_mean ehiDGEN_10_mean ehi5_global_mean ehi10_global_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
1 ehiDGEN_5_mean ehiDGEN_10_mean ehi5_global_mean ehi10_global_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
2 ehiDGEN_10_mean 0.42***
3 ehi5_global_mean 0.21*** 0.21***
4 ehi10_global_mean 0.19*** 0.37*** 0.43***
5 sex_dummy 0.074 0.053 0.023 0.051
6 demo_age_1 -0.053 -0.22*** -0.21*** -0.26*** -0.028
7 edu_num -0.031 -0.037 0.083 -0.073 0.0094 -0.078
8 aot_total -0.064 -0.0067 -0.0032 0.050 -0.072 -0.20*** 0.10*
9 crt_correct 0.045 0.053 0.093* 0.090* -0.16*** -0.0044 0.12** -0.037
10 crt_int -0.041 -0.065 -0.11* -0.083 0.14* 0.028 -0.11* -0.011 -0.88***
11
12 *p<0.05
13 **p<0.01
14 ***p<0.001

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,10 @@
,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
ehiDGEN_10_mean,2.9929E-22,,,,,,,,
ehi5_global_mean,4.14491E-06,3.02E-06,,,,,,,
ehi10_global_mean,1.61573E-05,1.66719E-17,1.63546E-23,,,,,,
sex_dummy,0.104170333,0.245085093,0.606827914,0.260991884,,,,,
demo_age_1,0.239362544,1.57395E-06,4.57184E-06,5.31327E-09,0.536013997,,,,
edu_num,0.489059792,0.41328311,0.067372333,0.10843647,0.835590798,0.086262901,,,
aot_total,0.156912095,0.882372703,0.943759329,0.269428383,0.112852544,5.87906E-06,0.025659604,,
crt_correct,0.319158511,0.24026811,0.039792024,0.047593264,0.000483116,0.923101803,0.00884239,0.4122211,
crt_int,0.365601388,0.150705909,0.01133869,0.065890723,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
1 ehiDGEN_5_mean ehiDGEN_10_mean ehi5_global_mean ehi10_global_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
1 ehiDGEN_5_mean ehiDGEN_10_mean ehi5_global_mean ehi10_global_mean sex_dummy demo_age_1 edu_num aot_total crt_correct
2 ehiDGEN_10_mean 2.9929E-22
3 ehi5_global_mean 4.14491E-06 3.02E-06
4 ehi10_global_mean 1.61573E-05 1.66719E-17 1.63546E-23
5 sex_dummy 0.104170333 0.245085093 0.606827914 0.260991884
6 demo_age_1 0.239362544 1.57395E-06 4.57184E-06 5.31327E-09 0.536013997
7 edu_num 0.489059792 0.41328311 0.067372333 0.10843647 0.835590798 0.086262901
8 aot_total 0.156912095 0.882372703 0.943759329 0.269428383 0.112852544 5.87906E-06 0.025659604
9 crt_correct 0.319158511 0.24026811 0.039792024 0.047593264 0.000483116 0.923101803 0.00884239 0.4122211
10 crt_int 0.365601388 0.150705909 0.01133869 0.065890723 0.001816488 0.533287266 0.012949289 0.810348767 9.5293E-160

View File

@ -0,0 +1,34 @@
Variable1,Variable2,Spearman_r,P_value
DGEN_10_global_mean,aot_total,0.393039545,0
DGEN_5_global_mean,aot_total,0.401732294,0
DGEN_fut_10_mean,aot_total,0.372705919,0
DGEN_fut_5_mean,aot_total,0.400202502,0
DGEN_past_10_mean,aot_total,0.368847521,0
DGEN_past_5_mean,aot_total,0.376820454,0
DGENfut_global_mean,aot_total,0.382481063,0
DGENpast_global_mean,aot_total,0.369258554,0
DGEN_fut_5_mean,crt_correct,-0.08823341,0.051182461
DGENfut_global_mean,crt_correct,-0.086607229,0.05563658
DGEN_5_global_mean,crt_correct,-0.079087177,0.080612427
DGEN_fut_10_mean,crt_correct,-0.073786402,0.103162718
DGEN_fut_5.10_mean,aot_total,0.06505662,0.150871574
DGEN_10_global_mean,crt_correct,-0.060769258,0.17972166
DGEN_5.10_global_mean,aot_total,0.058102985,0.199615824
DGEN_past_5_mean,crt_correct,-0.057080059,0.207659218
DGEN_fut_5.10_mean,crt_correct,-0.052644919,0.245242874
DGEN_past_5.10_mean,aot_total,0.050343754,0.266514845
DGEN_fut_5.10_mean,crt_int,0.049423543,0.275365524
DGENpast_global_mean,crt_correct,-0.048081298,0.288630365
DGEN_past_10_mean,crt_int,-0.042776663,0.345197086
DGEN_5.10_global_mean,crt_correct,-0.036910124,0.415418734
DGENpast_global_mean,crt_int,-0.035697633,0.430916171
DGEN_past_10_mean,crt_correct,-0.035562484,0.432664043
DGEN_past_5_mean,crt_int,-0.031740856,0.483754572
DGEN_10_global_mean,crt_int,-0.019205703,0.671817818
DGEN_past_5.10_mean,crt_int,-0.015826279,0.727015617
DGEN_5_global_mean,crt_int,-0.014881366,0.742720553
DGEN_past_5.10_mean,crt_correct,-0.013651301,0.763324994
DGEN_5.10_global_mean,crt_int,0.011869586,0.793465063
DGENfut_global_mean,crt_int,0.009764448,0.829473295
DGEN_fut_10_mean,crt_int,0.002368134,0.95834271
DGEN_fut_5_mean,crt_int,-0.0005323,0.990632398
1 Variable1 Variable2 Spearman_r P_value
1 Variable1 Variable2 Spearman_r P_value
2 DGEN_10_global_mean aot_total 0.393039545 0
3 DGEN_5_global_mean aot_total 0.401732294 0
4 DGEN_fut_10_mean aot_total 0.372705919 0
5 DGEN_fut_5_mean aot_total 0.400202502 0
6 DGEN_past_10_mean aot_total 0.368847521 0
7 DGEN_past_5_mean aot_total 0.376820454 0
8 DGENfut_global_mean aot_total 0.382481063 0
9 DGENpast_global_mean aot_total 0.369258554 0
10 DGEN_fut_5_mean crt_correct -0.08823341 0.051182461
11 DGENfut_global_mean crt_correct -0.086607229 0.05563658
12 DGEN_5_global_mean crt_correct -0.079087177 0.080612427
13 DGEN_fut_10_mean crt_correct -0.073786402 0.103162718
14 DGEN_fut_5.10_mean aot_total 0.06505662 0.150871574
15 DGEN_10_global_mean crt_correct -0.060769258 0.17972166
16 DGEN_5.10_global_mean aot_total 0.058102985 0.199615824
17 DGEN_past_5_mean crt_correct -0.057080059 0.207659218
18 DGEN_fut_5.10_mean crt_correct -0.052644919 0.245242874
19 DGEN_past_5.10_mean aot_total 0.050343754 0.266514845
20 DGEN_fut_5.10_mean crt_int 0.049423543 0.275365524
21 DGENpast_global_mean crt_correct -0.048081298 0.288630365
22 DGEN_past_10_mean crt_int -0.042776663 0.345197086
23 DGEN_5.10_global_mean crt_correct -0.036910124 0.415418734
24 DGENpast_global_mean crt_int -0.035697633 0.430916171
25 DGEN_past_10_mean crt_correct -0.035562484 0.432664043
26 DGEN_past_5_mean crt_int -0.031740856 0.483754572
27 DGEN_10_global_mean crt_int -0.019205703 0.671817818
28 DGEN_past_5.10_mean crt_int -0.015826279 0.727015617
29 DGEN_5_global_mean crt_int -0.014881366 0.742720553
30 DGEN_past_5.10_mean crt_correct -0.013651301 0.763324994
31 DGEN_5.10_global_mean crt_int 0.011869586 0.793465063
32 DGENfut_global_mean crt_int 0.009764448 0.829473295
33 DGEN_fut_10_mean crt_int 0.002368134 0.95834271
34 DGEN_fut_5_mean crt_int -0.0005323 0.990632398

View File

@ -0,0 +1,175 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load required libraries
library(corrplot)
library(Hmisc)
library(psych)
# Load the data
exp2_data <- read.csv("eohi2.csv")
# Define the two sets of variables
set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean",
"DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean",
"DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
set2_vars <- c("aot_total", "crt_correct", "crt_int")
# Create subset with only the variables of interest
correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
# ===== NORMALITY CHECKS =====
# Shapiro-Wilk tests for normality (n < 5000)
for(var in names(correlation_data)) {
if(length(na.omit(correlation_data[[var]])) <= 5000) {
shapiro_result <- shapiro.test(correlation_data[[var]])
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
var, shapiro_result$p.value,
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
}
}
# Visual normality checks
pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
par(mfrow = c(2, 4))
for(var in names(correlation_data)) {
# Histogram with normal curve overlay
hist(correlation_data[[var]], main = paste("Histogram:", var),
xlab = var, freq = FALSE)
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
sd = sd(correlation_data[[var]], na.rm = TRUE)),
add = TRUE, col = "red", lwd = 2)
# Q-Q plot
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
qqline(correlation_data[[var]], col = "red", lwd = 2)
}
dev.off()
# ===== LINEARITY CHECKS =====
# Check linearity between variable pairs
pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
par(mfrow = c(4, 3))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
# Scatter plot with regression line
plot(correlation_data[[var1]], correlation_data[[var2]],
main = paste(var1, "vs", var2),
xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
# Add linear regression line
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
abline(lm_fit, col = "red", lwd = 2)
# Add LOESS smooth line for non-linear pattern detection
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
loess_pred <- predict(loess_fit, x_seq)
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
# Calculate R-squared for linear fit
r_squared <- summary(lm_fit)$r.squared
cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
}
}
dev.off()
# Residual analysis for linearity
pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
par(mfrow = c(4, 3))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
residuals <- residuals(lm_fit)
fitted <- fitted(lm_fit)
plot(fitted, residuals,
main = paste("Residuals:", var1, "vs", var2),
xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
abline(h = 0, col = "red", lwd = 2)
# Add smooth line to residuals
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
}
}
dev.off()
# Calculate Spearman correlation matrix only
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
# Print correlation matrix with 5 decimal places
print(round(cor_matrix_spearman, 5))
# Separate correlations between the two sets (Spearman)
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
print(round(set1_set2_cor, 5))
# Calculate correlations within each set (Spearman)
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
# Statistical significance tests (Spearman)
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
p_val <- cor_test_results_spearman$P[var1, var2]
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
}
}
# Create correlation plot for Spearman only
pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
corrplot(cor_matrix_spearman, method = "color", type = "upper",
order = "hclust", tl.cex = 0.8, tl.col = "black",
addCoef.col = "black", number.cex = 0.7,
title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
dev.off()
# Summary statistics
desc_stats <- describe(correlation_data)
print(round(desc_stats, 5))
# Save results to CSV files
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
# Save correlation results in a formatted table
cor_results <- data.frame(
Variable1 = character(),
Variable2 = character(),
Spearman_r = numeric(),
P_value = numeric(),
stringsAsFactors = FALSE
)
# Extract correlations between sets
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
r_val <- cor_matrix_spearman[var1, var2]
p_val <- cor_test_results_spearman$P[var1, var2]
cor_results <- rbind(cor_results, data.frame(
Variable1 = var1,
Variable2 = var2,
Spearman_r = r_val,
P_value = p_val,
stringsAsFactors = FALSE
))
}
}
write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)

View File

@ -0,0 +1,34 @@
Variable1,Variable2,Spearman_r,P_value
N10_global_mean,aot_total,0.150441363,0.000846088
N5_global_mean,aot_total,0.185910923,3.52323E-05
NFut_10_mean,aot_total,0.155140023,0.000575829
NFut_5_mean,aot_total,0.199536342,8.75208E-06
NFut_global_mean,aot_total,0.185343328,3.72583E-05
NPast_10_mean,aot_total,0.143227088,0.001495728
NPast_5_mean,aot_total,0.150454018,0.000845224
NPast_global_mean,aot_total,0.148341676,0.001001318
X5.10_global_mean,aot_total,0.206551087,4.11273E-06
X5.10fut_mean,aot_total,0.208127536,3.45821E-06
X5.10past_mean,aot_total,0.168898536,0.000175189
NFut_5_mean,crt_correct,-0.055312717,0.222103555
NPast_5_mean,crt_int,-0.053658543,0.236260102
NPast_global_mean,crt_int,-0.043976695,0.33182097
NPast_10_mean,crt_int,-0.043338736,0.33888983
NFut_global_mean,crt_correct,-0.036977473,0.414567649
X5.10fut_mean,crt_int,-0.030131358,0.506209892
NPast_5_mean,crt_correct,0.028880988,0.524024755
NFut_5_mean,crt_int,0.027093379,0.550039031
X5.10_global_mean,crt_int,-0.025449064,0.574514939
NPast_10_mean,crt_correct,0.021023332,0.642821387
NPast_global_mean,crt_correct,0.020816029,0.646102235
NFut_10_mean,crt_correct,-0.020486277,0.651335162
X5.10past_mean,crt_int,-0.018645297,0.680859581
N5_global_mean,crt_correct,-0.018075161,0.690105495
N10_global_mean,crt_int,-0.016544862,0.71514808
NFut_10_mean,crt_int,-0.009395086,0.835830518
NFut_global_mean,crt_int,0.008419817,0.852666611
N10_global_mean,crt_correct,-0.008133995,0.857613839
N5_global_mean,crt_int,-0.007330571,0.87154938
X5.10fut_mean,crt_correct,-0.005165193,0.909294754
X5.10_global_mean,crt_correct,-0.004538036,0.920269031
X5.10past_mean,crt_correct,0.003021813,0.94685912
1 Variable1 Variable2 Spearman_r P_value
1 Variable1 Variable2 Spearman_r P_value
2 N10_global_mean aot_total 0.150441363 0.000846088
3 N5_global_mean aot_total 0.185910923 3.52323E-05
4 NFut_10_mean aot_total 0.155140023 0.000575829
5 NFut_5_mean aot_total 0.199536342 8.75208E-06
6 NFut_global_mean aot_total 0.185343328 3.72583E-05
7 NPast_10_mean aot_total 0.143227088 0.001495728
8 NPast_5_mean aot_total 0.150454018 0.000845224
9 NPast_global_mean aot_total 0.148341676 0.001001318
10 X5.10_global_mean aot_total 0.206551087 4.11273E-06
11 X5.10fut_mean aot_total 0.208127536 3.45821E-06
12 X5.10past_mean aot_total 0.168898536 0.000175189
13 NFut_5_mean crt_correct -0.055312717 0.222103555
14 NPast_5_mean crt_int -0.053658543 0.236260102
15 NPast_global_mean crt_int -0.043976695 0.33182097
16 NPast_10_mean crt_int -0.043338736 0.33888983
17 NFut_global_mean crt_correct -0.036977473 0.414567649
18 X5.10fut_mean crt_int -0.030131358 0.506209892
19 NPast_5_mean crt_correct 0.028880988 0.524024755
20 NFut_5_mean crt_int 0.027093379 0.550039031
21 X5.10_global_mean crt_int -0.025449064 0.574514939
22 NPast_10_mean crt_correct 0.021023332 0.642821387
23 NPast_global_mean crt_correct 0.020816029 0.646102235
24 NFut_10_mean crt_correct -0.020486277 0.651335162
25 X5.10past_mean crt_int -0.018645297 0.680859581
26 N5_global_mean crt_correct -0.018075161 0.690105495
27 N10_global_mean crt_int -0.016544862 0.71514808
28 NFut_10_mean crt_int -0.009395086 0.835830518
29 NFut_global_mean crt_int 0.008419817 0.852666611
30 N10_global_mean crt_correct -0.008133995 0.857613839
31 N5_global_mean crt_int -0.007330571 0.87154938
32 X5.10fut_mean crt_correct -0.005165193 0.909294754
33 X5.10_global_mean crt_correct -0.004538036 0.920269031
34 X5.10past_mean crt_correct 0.003021813 0.94685912

View File

@ -0,0 +1,197 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load required libraries
library(corrplot)
library(Hmisc)
library(psych)
# Load the data
exp2_data <- read.csv("eohi2.csv")
# Define the two sets of variables
set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean",
"X5.10past_mean", "X5.10fut_mean", "NPast_global_mean",
"NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
set2_vars <- c("aot_total", "crt_correct", "crt_int")
# Create subset with only the variables of interest
correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
# ===== NORMALITY CHECKS =====
# Shapiro-Wilk tests for normality (n < 5000)
for(var in names(correlation_data)) {
if(length(na.omit(correlation_data[[var]])) <= 5000) {
shapiro_result <- shapiro.test(correlation_data[[var]])
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
var, shapiro_result$p.value,
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
}
}
# Kolmogorov-Smirnov test for normality
for(var in names(correlation_data)) {
ks_result <- ks.test(correlation_data[[var]], "pnorm",
mean = mean(correlation_data[[var]], na.rm = TRUE),
sd = sd(correlation_data[[var]], na.rm = TRUE))
cat(sprintf("%s: KS p = %.5f %s\n",
var, ks_result$p.value,
ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
}
# Visual normality checks
pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
par(mfrow = c(2, 4))
for(var in names(correlation_data)) {
# Histogram with normal curve overlay
hist(correlation_data[[var]], main = paste("Histogram:", var),
xlab = var, freq = FALSE)
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
sd = sd(correlation_data[[var]], na.rm = TRUE)),
add = TRUE, col = "red", lwd = 2)
# Q-Q plot
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
qqline(correlation_data[[var]], col = "red", lwd = 2)
}
dev.off()
# ===== LINEARITY CHECKS =====
# Check linearity between variable pairs
pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
par(mfrow = c(4, 3))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
# Scatter plot with regression line
plot(correlation_data[[var1]], correlation_data[[var2]],
main = paste(var1, "vs", var2),
xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
# Add linear regression line
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
abline(lm_fit, col = "red", lwd = 2)
# Add LOESS smooth line for non-linear pattern detection
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
loess_pred <- predict(loess_fit, x_seq)
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
# Calculate R-squared for linear fit
r_squared <- summary(lm_fit)$r.squared
cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
}
}
dev.off()
# Residual analysis for linearity
pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
par(mfrow = c(4, 3))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
residuals <- residuals(lm_fit)
fitted <- fitted(lm_fit)
plot(fitted, residuals,
main = paste("Residuals:", var1, "vs", var2),
xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
abline(h = 0, col = "red", lwd = 2)
# Add smooth line to residuals
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
}
}
dev.off()
# Calculate correlation matrices (both Pearson and Spearman)
cor_matrix_pearson <- cor(correlation_data, method = "pearson")
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
# Use Spearman as primary method
cor_matrix <- cor_matrix_spearman
# Print correlation matrices with 5 decimal places
print(round(cor_matrix_spearman, 5))
# Separate correlations between the two sets (Spearman)
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
print(round(set1_set2_cor, 5))
# Calculate correlations within each set (Spearman)
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
# Statistical significance tests (Spearman)
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
p_val <- cor_test_results_spearman$P[var1, var2]
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
}
}
# Create correlation plots for both methods
pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
corrplot(cor_matrix_spearman, method = "color", type = "upper",
order = "hclust", tl.cex = 0.8, tl.col = "black",
addCoef.col = "black", number.cex = 0.7,
title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
dev.off()
pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
corrplot(cor_matrix_pearson, method = "color", type = "upper",
order = "hclust", tl.cex = 0.8, tl.col = "black",
addCoef.col = "black", number.cex = 0.7,
title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
dev.off()
# Summary statistics
desc_stats <- describe(correlation_data)
print(round(desc_stats, 5))
# Save results to CSV files
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
# Save correlation results in a formatted table
cor_results <- data.frame(
Variable1 = character(),
Variable2 = character(),
Spearman_r = numeric(),
P_value = numeric(),
stringsAsFactors = FALSE
)
# Extract significant correlations between sets
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
r_val <- cor_matrix_spearman[var1, var2]
p_val <- cor_test_results_spearman$P[var1, var2]
cor_results <- rbind(cor_results, data.frame(
Variable1 = var1,
Variable2 = var2,
Spearman_r = r_val,
P_value = p_val,
stringsAsFactors = FALSE
))
}
}
write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)

View File

@ -0,0 +1,176 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load required libraries
library(corrplot)
library(Hmisc)
library(psych)
# Load the data
exp1_data <- read.csv("eohi2.csv")
# Define the two sets of variables
set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val",
"ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
"ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
"ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
"ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
"ehiDGEN_5_mean", "ehiDGEN_10_mean",
"ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
set2_vars <- c("aot_total", "crt_correct", "crt_int")
# Create subset with only the variables of interest
correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
# ===== NORMALITY CHECKS =====
# Shapiro-Wilk tests for normality
for(var in names(correlation_data)) {
shapiro_result <- shapiro.test(correlation_data[[var]])
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
var, shapiro_result$p.value,
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
}
# Visual normality checks
pdf("normality_plots.pdf", width = 12, height = 8)
par(mfrow = c(2, 4))
for(var in names(correlation_data)) {
# Histogram with normal curve overlay
hist(correlation_data[[var]], main = paste("Histogram:", var),
xlab = var, freq = FALSE)
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
sd = sd(correlation_data[[var]], na.rm = TRUE)),
add = TRUE, col = "red", lwd = 2)
# Q-Q plot
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
qqline(correlation_data[[var]], col = "red", lwd = 2)
}
dev.off()
# ===== LINEARITY CHECKS =====
# Check linearity between variable pairs
pdf("linearity_plots.pdf", width = 15, height = 10)
par(mfrow = c(3, 5))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
# Scatter plot with regression line
plot(correlation_data[[var1]], correlation_data[[var2]],
main = paste(var1, "vs", var2),
xlab = var1, ylab = var2, pch = 16, cex = 0.6)
# Add linear regression line
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
abline(lm_fit, col = "red", lwd = 2)
# Add LOESS smooth line for non-linear pattern detection
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
loess_pred <- predict(loess_fit, x_seq)
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
# Calculate R-squared for linear fit
r_squared <- summary(lm_fit)$r.squared
cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
}
}
dev.off()
# Residual analysis for linearity
pdf("residual_plots.pdf", width = 15, height = 10)
par(mfrow = c(3, 5))
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
residuals <- residuals(lm_fit)
fitted <- fitted(lm_fit)
plot(fitted, residuals,
main = paste("Residuals:", var1, "vs", var2),
xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
abline(h = 0, col = "red", lwd = 2)
# Add smooth line to residuals
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
}
}
dev.off()
# Calculate correlation matrix (Spearman only)
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
# Print correlation matrix with 5 decimal places
print(round(cor_matrix_spearman, 5))
# Separate correlations between the two sets (Spearman)
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
print(round(set1_set2_cor, 5))
# Calculate correlations within each set (Spearman)
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
# Statistical significance tests (Spearman)
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
p_val <- cor_test_results_spearman$P[var1, var2]
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
}
}
# Create correlation plot (Spearman only)
pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
corrplot(cor_matrix_spearman, method = "color", type = "upper",
order = "hclust", tl.cex = 0.8, tl.col = "black",
addCoef.col = "black", number.cex = 0.7,
title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
dev.off()
# Summary statistics
desc_stats <- describe(correlation_data)
print(round(desc_stats, 5))
# Save results to CSV files
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
# Save correlation results in a formatted table
cor_results <- data.frame(
Variable1 = character(),
Variable2 = character(),
Spearman_r = numeric(),
P_value = numeric(),
stringsAsFactors = FALSE
)
# Extract significant correlations between sets
for(i in 1:length(set1_vars)) {
for(j in 1:length(set2_vars)) {
var1 <- set1_vars[i]
var2 <- set2_vars[j]
r_val <- cor_matrix_spearman[var1, var2]
p_val <- cor_test_results_spearman$P[var1, var2]
cor_results <- rbind(cor_results, data.frame(
Variable1 = var1,
Variable2 = var2,
Spearman_r = r_val,
P_value = p_val,
stringsAsFactors = FALSE
))
}
}
write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)

View File

@ -0,0 +1,266 @@
# Script to combine and recode Likert scale items in eohi2.csv
# Combines 01 and 02 versions of items, then recodes text to numeric values
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings="" keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define the mapping function
recode_likert <- function(x) {
case_when(
tolower(x) == "strongly disagree" ~ -3,
tolower(x) == "disagree" ~ -2,
tolower(x) == "somewhat disagree" ~ -1,
tolower(x) == "neither agree nor disagree" ~ 0,
tolower(x) == "somewhat agree" ~ 1,
tolower(x) == "agree" ~ 2,
tolower(x) == "strongly agree" ~ 3,
TRUE ~ NA_real_
)
}
# Define source column pairs (Set A and Set B)
source_cols_A <- c(
"01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
"01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
"01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
"01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
"01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
"01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
"01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
"01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
"01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
"01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
"01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
"01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
)
source_cols_B <- c(
"02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
"02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
"02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
"02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
"02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
"02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
"02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
"02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
"02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
"02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
"02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
"02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
)
# Define target column names
target_cols <- c(
"past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
"past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
"past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
"past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
"past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
"past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
"fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
"fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
"fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
"fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
"fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
"fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
)
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Print first 30 actual column names for debugging
cat("First 30 actual column names in CSV:\n")
for (i in 1:min(30, length(df_cols))) {
cat(sprintf(" %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
}
cat("\n")
# Check Source A columns
missing_A <- source_cols_A[!source_cols_A %in% df_cols]
existing_A <- source_cols_A[source_cols_A %in% df_cols]
cat("Source Set A:\n")
cat(" Expected: 60 columns\n")
cat(" Found:", length(existing_A), "columns\n")
cat(" Missing:", length(missing_A), "columns\n")
if (length(missing_A) > 0) {
cat("\n Missing columns from Set A:\n")
for (col in missing_A) {
cat(" -", col, "\n")
}
}
# Check Source B columns
missing_B <- source_cols_B[!source_cols_B %in% df_cols]
existing_B <- source_cols_B[source_cols_B %in% df_cols]
cat("\nSource Set B:\n")
cat(" Expected: 60 columns\n")
cat(" Found:", length(existing_B), "columns\n")
cat(" Missing:", length(missing_B), "columns\n")
if (length(missing_B) > 0) {
cat("\n Missing columns from Set B:\n")
for (col in missing_B) {
cat(" -", col, "\n")
}
}
# Check for columns with similar names (potential typos/spaces)
if (length(missing_A) > 0 || length(missing_B) > 0) {
cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
all_missing <- c(missing_A, missing_B)
for (miss_col in all_missing) {
# Find columns that start with similar pattern
pattern <- substr(miss_col, 1, 10)
similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
if (length(similar) > 0) {
cat("\nLooking for:", miss_col)
cat("\n Similar columns found:\n")
for (sim in similar) {
cat(" - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
}
}
}
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_A) > 30 || length(missing_B) > 30) {
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# Process each pair of columns
for (i in 1:60) {
col_A <- source_cols_A[i]
col_B <- source_cols_B[i]
target_col <- target_cols[i]
# Get values from columns, handling missing columns
vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
# Coalesce: take value from vals_A if present, otherwise from vals_B
combined <- ifelse(!is.na(vals_A) & vals_A != "",
vals_A,
vals_B)
# Recode to numeric
df[[target_col]] <- recode_likert(combined)
# Print progress
cat("Processed:", target_col, "\n")
}
# ============= VERIFY TARGET COLUMNS WERE CREATED =============
cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
# Get updated column names
df_cols_after <- trimws(names(df))
# Check which target columns exist
existing_targets <- target_cols[target_cols %in% df_cols_after]
missing_targets <- target_cols[!target_cols %in% df_cols_after]
cat("Target Columns:\n")
cat(" Expected: 60 columns\n")
cat(" Created:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n WARNING: The following target columns were NOT created:\n")
for (col in missing_targets) {
cat(" -", col, "\n")
}
stop("\nERROR: Not all target columns were created successfully!")
} else {
cat("\n SUCCESS: All 60 target columns created successfully!\n")
}
cat("\n=== END VERIFICATION ===\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
# This function can be run multiple times to check different random rows
qa_check_random_row <- function() {
# Pick a random row
random_row <- sample(1:nrow(df), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
cat("========================================\n\n")
# Check each of the 60 pairs
for (i in 1:60) {
col_A <- source_cols_A[i]
col_B <- source_cols_B[i]
target_col <- target_cols[i]
# Get values
val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
target_val <- df[random_row, target_col]
# Determine which source had the value
has_val_A <- !is.na(val_A) && val_A != ""
has_val_B <- !is.na(val_B) && val_B != ""
if (has_val_A) {
source_used <- "A"
original_text <- val_A
} else if (has_val_B) {
source_used <- "B"
original_text <- val_B
} else {
source_used <- "NONE"
original_text <- "(empty)"
}
# Print the info
cat(sprintf("Pair %2d:\n", i))
cat(sprintf(" Source A: %-30s\n", col_A))
cat(sprintf(" Source B: %-30s\n", col_B))
cat(sprintf(" Target: %-30s\n", target_col))
cat(sprintf(" Value found in: Source %s\n", source_used))
cat(sprintf(" Original text: '%s'\n", original_text))
cat(sprintf(" Numeric value: %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
cat("\n")
}
cat("========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on first random row
cat("\n\n")
qa_check_random_row()
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
cat("Run this command in R console:\n")
cat(" qa_check_random_row()\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")

View File

@ -0,0 +1,192 @@
# Script to recode present-time Likert scale items in eohi2.csv
# Recodes prePrefItem, prePersItem, and preValItem to numeric values
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define the mapping function
recode_likert <- function(x) {
case_when(
tolower(x) == "strongly disagree" ~ -3,
tolower(x) == "disagree" ~ -2,
tolower(x) == "somewhat disagree" ~ -1,
tolower(x) == "neither agree nor disagree" ~ 0,
tolower(x) == "somewhat agree" ~ 1,
tolower(x) == "agree" ~ 2,
tolower(x) == "strongly agree" ~ 3,
TRUE ~ NA_real_
)
}
# Define source columns (15 columns total)
source_cols <- c(
"prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
"prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
"preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
)
# Define target column names (15 columns total)
target_cols <- c(
"present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
"present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
"present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
)
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Print first 30 actual column names for debugging
cat("First 30 actual column names in CSV:\n")
for (i in 1:min(30, length(df_cols))) {
cat(sprintf(" %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
}
cat("\n")
# Check Source columns
missing_source <- source_cols[!source_cols %in% df_cols]
existing_source <- source_cols[source_cols %in% df_cols]
cat("Source Columns:\n")
cat(" Expected: 15 columns\n")
cat(" Found:", length(existing_source), "columns\n")
cat(" Missing:", length(missing_source), "columns\n")
if (length(missing_source) > 0) {
cat("\n Missing columns:\n")
for (col in missing_source) {
cat(" -", col, "\n")
}
}
# Check for columns with similar names (potential typos/spaces)
if (length(missing_source) > 0) {
cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
for (miss_col in missing_source) {
# Find columns that start with similar pattern
pattern <- substr(miss_col, 1, 10)
similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
if (length(similar) > 0) {
cat("\nLooking for:", miss_col)
cat("\n Similar columns found:\n")
for (sim in similar) {
cat(" - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
}
}
}
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_source) > 7) {
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# Check if target columns exist in the dataframe
cat("\n=== CHECKING TARGET COLUMNS ===\n")
existing_targets <- target_cols[target_cols %in% df_cols]
missing_targets <- target_cols[!target_cols %in% df_cols]
cat("Target Columns:\n")
cat(" Expected: 15 columns\n")
cat(" Found:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n Target columns do NOT exist yet - will create them.\n")
if (length(existing_targets) > 0) {
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
}
} else {
cat(" All target columns exist - will overwrite with recoded values.\n")
}
cat("\n")
# Process each column (overwrite existing target columns with recoded values)
for (i in 1:15) {
source_col <- source_cols[i]
target_col <- target_cols[i]
# Get values from source column, handling missing columns
source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
# Recode to numeric and overwrite existing target column
df[[target_col]] <- recode_likert(source_vals)
# Print progress
cat("Processed:", target_col, "\n")
}
cat("\n=== RECODING COMPLETE ===\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
# This function can be run multiple times to check different random rows
qa_check_random_row <- function() {
# Pick a random row
random_row <- sample(1:nrow(df), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
cat("========================================\n\n")
# Check each of the 15 columns
for (i in 1:15) {
source_col <- source_cols[i]
target_col <- target_cols[i]
# Get values
source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
target_val <- df[random_row, target_col]
# Determine if source has a value
has_val <- !is.na(source_val) && source_val != ""
original_text <- if (has_val) source_val else "(empty)"
# Print the info
cat(sprintf("Column %2d:\n", i))
cat(sprintf(" Source: %-30s\n", source_col))
cat(sprintf(" Target: %-30s\n", target_col))
cat(sprintf(" Original text: '%s'\n", original_text))
cat(sprintf(" Numeric value: %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
cat("\n")
}
cat("========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on first random row
cat("\n\n")
qa_check_random_row()
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
cat("Run this command in R console:\n")
cat(" qa_check_random_row()\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
cat("Review the output above, then uncomment line 189 to save changes.\n")
cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")

View File

@ -0,0 +1,253 @@
# Script to combine DGEN variables in eohi2.csv
# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define source column pairs (Set A and Set B)
# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
source_cols_A <- c(
"01past5PrefDGEN_1",
"01past5PersDGEN_1",
"01past5ValDGEN_1",
"01past10PrefDGEN_1",
"01past10PersDGEN_1",
"01past10ValDGEN_1",
"01fut5PrefDGEN_8",
"01fut5PersDGEN_8",
"01fut5ValuesDGEN_1",
"01fut10PrefDGEN_8",
"01fut10PersDGEN_8",
"01fut10ValuesDGEN_1"
)
source_cols_B <- c(
"02past5PrefDGEN_1",
"02past5PersDGEN_1",
"02past5ValDGEN_1",
"02past10PrefDGEN_1",
"02past10PersDGEN_1",
"02past10ValDGEN_1",
"02fut5PrefDGEN_8",
"02fut5PersDGEN_8",
"02fut5ValDGEN_1",
"02fut10PrefDGEN_8",
"02fut10PersDGEN_8",
"02fut10ValDGEN_1"
)
# Define target column names
target_cols <- c(
"DGEN_past_5_Pref",
"DGEN_past_5_Pers",
"DGEN_past_5_Val",
"DGEN_past_10_Pref",
"DGEN_past_10_Pers",
"DGEN_past_10_Val",
"DGEN_fut_5_Pref",
"DGEN_fut_5_Pers",
"DGEN_fut_5_Val",
"DGEN_fut_10_Pref",
"DGEN_fut_10_Pers",
"DGEN_fut_10_Val"
)
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Print first 30 actual column names for debugging
cat("First 30 actual column names in CSV:\n")
for (i in 1:min(30, length(df_cols))) {
cat(sprintf(" %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
}
cat("\n")
# Check Source A columns
missing_A <- source_cols_A[!source_cols_A %in% df_cols]
existing_A <- source_cols_A[source_cols_A %in% df_cols]
cat("Source Set A:\n")
cat(" Expected: 12 columns\n")
cat(" Found:", length(existing_A), "columns\n")
cat(" Missing:", length(missing_A), "columns\n")
if (length(missing_A) > 0) {
cat("\n Missing columns from Set A:\n")
for (col in missing_A) {
cat(" -", col, "\n")
}
}
# Check Source B columns
missing_B <- source_cols_B[!source_cols_B %in% df_cols]
existing_B <- source_cols_B[source_cols_B %in% df_cols]
cat("\nSource Set B:\n")
cat(" Expected: 12 columns\n")
cat(" Found:", length(existing_B), "columns\n")
cat(" Missing:", length(missing_B), "columns\n")
if (length(missing_B) > 0) {
cat("\n Missing columns from Set B:\n")
for (col in missing_B) {
cat(" -", col, "\n")
}
}
# Check for columns with similar names (potential typos/spaces)
if (length(missing_A) > 0 || length(missing_B) > 0) {
cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
all_missing <- c(missing_A, missing_B)
for (miss_col in all_missing) {
# Find columns that start with similar pattern
pattern <- substr(miss_col, 1, 10)
similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
if (length(similar) > 0) {
cat("\nLooking for:", miss_col)
cat("\n Similar columns found:\n")
for (sim in similar) {
cat(" - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
}
}
}
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_A) > 6 || length(missing_B) > 6) {
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# Process each pair of columns (just copy values, no recoding)
for (i in 1:12) {
col_A <- source_cols_A[i]
col_B <- source_cols_B[i]
target_col <- target_cols[i]
# Get values from columns, handling missing columns
vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
# Coalesce: take value from vals_A if present, otherwise from vals_B
# No recoding - just copy the value directly
combined <- ifelse(!is.na(vals_A) & vals_A != "",
vals_A,
vals_B)
# Copy directly to target column (no recoding)
df[[target_col]] <- combined
# Print progress
cat("Processed:", target_col, "\n")
}
# ============= VERIFY TARGET COLUMNS WERE CREATED =============
cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
# Get updated column names
df_cols_after <- trimws(names(df))
# Check which target columns exist
existing_targets <- target_cols[target_cols %in% df_cols_after]
missing_targets <- target_cols[!target_cols %in% df_cols_after]
cat("Target Columns:\n")
cat(" Expected: 12 columns\n")
cat(" Created:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n WARNING: The following target columns were NOT created:\n")
for (col in missing_targets) {
cat(" -", col, "\n")
}
stop("\nERROR: Not all target columns were created successfully!")
} else {
cat("\n SUCCESS: All 12 target columns created successfully!\n")
}
cat("\n=== END VERIFICATION ===\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
# This function can be run multiple times to check different random rows
qa_check_random_row <- function() {
# Pick a random row
random_row <- sample(1:nrow(df), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
cat("========================================\n\n")
# Check each of the 12 pairs
for (i in 1:12) {
col_A <- source_cols_A[i]
col_B <- source_cols_B[i]
target_col <- target_cols[i]
# Get values
val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
target_val <- df[random_row, target_col]
# Determine which source had the value
has_val_A <- !is.na(val_A) && val_A != ""
has_val_B <- !is.na(val_B) && val_B != ""
if (has_val_A) {
source_used <- "A"
original_value <- val_A
} else if (has_val_B) {
source_used <- "B"
original_value <- val_B
} else {
source_used <- "NONE"
original_value <- "(empty)"
}
# Print the info
cat(sprintf("Pair %2d:\n", i))
cat(sprintf(" Source A: %-30s\n", col_A))
cat(sprintf(" Source B: %-30s\n", col_B))
cat(sprintf(" Target: %-30s\n", target_col))
cat(sprintf(" Value found in: Source %s\n", source_used))
cat(sprintf(" Original value: '%s'\n", original_value))
cat(sprintf(" Target value: '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
cat("\n")
}
cat("========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on first random row
cat("\n\n")
qa_check_random_row()
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
cat("Run this command in R console:\n")
cat(" qa_check_random_row()\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")

View File

@ -0,0 +1,183 @@
# Script to calculate DGEN means by time period in eohi2.csv
# Averages the 3 domain scores (Pref, Pers, Val) for each time period
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define source columns (12 total)
source_cols <- c(
"DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
)
# Define target columns (4 total)
target_cols <- c(
"DGEN_past_5_mean",
"DGEN_past_10_mean",
"DGEN_fut_5_mean",
"DGEN_fut_10_mean"
)
# Define groupings: each target gets 3 source columns
source_groups <- list(
DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
)
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Check Source columns
missing_source <- source_cols[!source_cols %in% df_cols]
existing_source <- source_cols[source_cols %in% df_cols]
cat("Source Columns:\n")
cat(" Expected: 12 columns\n")
cat(" Found:", length(existing_source), "columns\n")
cat(" Missing:", length(missing_source), "columns\n")
if (length(missing_source) > 0) {
cat("\n Missing source columns:\n")
for (col in missing_source) {
cat(" -", col, "\n")
}
}
# Check Target columns
missing_targets <- target_cols[!target_cols %in% df_cols]
existing_targets <- target_cols[target_cols %in% df_cols]
cat("\nTarget Columns:\n")
cat(" Expected: 4 columns\n")
cat(" Found:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n Target columns do NOT exist yet - will create them.\n")
if (length(existing_targets) > 0) {
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
}
} else {
cat(" All target columns exist - will overwrite with calculated values.\n")
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_source) > 6) {
stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# ============= CALCULATE MEANS =============
cat("Calculating DGEN means by time period...\n")
# Convert source columns to numeric
for (col in source_cols) {
if (col %in% names(df)) {
df[[col]] <- as.numeric(df[[col]])
}
}
# Calculate each target as the mean of its 3 source columns
for (target in target_cols) {
source_group <- source_groups[[target]]
# Get the columns that exist
existing_cols <- source_group[source_group %in% names(df)]
if (length(existing_cols) > 0) {
# Calculate row means across the 3 domain columns
df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
cat(" Processed:", target, "\n")
} else {
cat(" WARNING: No source columns found for", target, "\n")
}
}
cat("\n=== CALCULATION COMPLETE ===\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
# This function can be run multiple times to check different random rows
qa_check_random_row <- function() {
# Pick a random row
random_row <- sample(1:nrow(df), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
cat("========================================\n\n")
# Check each of the 4 target columns
for (target in target_cols) {
source_group <- source_groups[[target]]
cat(sprintf("Target: %s\n", target))
cat(" Source columns:\n")
# Get values from source columns
values <- numeric(3)
for (i in 1:3) {
col <- source_group[i]
val <- if (col %in% names(df)) df[random_row, col] else NA
values[i] <- val
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
}
# Calculate expected mean
valid_values <- values[!is.na(values)]
if (length(valid_values) > 0) {
expected_mean <- mean(valid_values)
actual_value <- df[random_row, target]
cat(sprintf("\n Calculation:\n"))
cat(sprintf(" Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
cat(sprintf(" Average of %d values: %.5f\n", length(valid_values), expected_mean))
cat(sprintf(" Target value: %.5f\n", actual_value))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat(" No valid values to calculate mean.\n")
}
cat("\n")
}
cat("========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on first random row
cat("\n\n")
qa_check_random_row()
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
cat("Run this command in R console:\n")
cat(" qa_check_random_row()\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
cat("Review the output above, then uncomment line 163 to save changes.\n")
cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")

View File

@ -0,0 +1,298 @@
# Script to compute AOT and CRT scales in eohi2.csv
# AOT: Reverse codes items 4-7, then averages all 8 items
# CRT: Calculates proportion of correct and intuitive responses
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define source columns
aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
crt_cols <- c("crt_1", "crt_2", "crt_3")
# Define target columns
target_cols <- c("aot_total", "crt_correct", "crt_int")
# Define correct and intuitive CRT answers
crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Check AOT columns
missing_aot <- aot_cols[!aot_cols %in% df_cols]
existing_aot <- aot_cols[aot_cols %in% df_cols]
cat("AOT Source Columns:\n")
cat(" Expected: 8 columns\n")
cat(" Found:", length(existing_aot), "columns\n")
cat(" Missing:", length(missing_aot), "columns\n")
if (length(missing_aot) > 0) {
cat("\n Missing AOT columns:\n")
for (col in missing_aot) {
cat(" -", col, "\n")
}
}
# Check CRT columns
missing_crt <- crt_cols[!crt_cols %in% df_cols]
existing_crt <- crt_cols[crt_cols %in% df_cols]
cat("\nCRT Source Columns:\n")
cat(" Expected: 3 columns\n")
cat(" Found:", length(existing_crt), "columns\n")
cat(" Missing:", length(missing_crt), "columns\n")
if (length(missing_crt) > 0) {
cat("\n Missing CRT columns:\n")
for (col in missing_crt) {
cat(" -", col, "\n")
}
}
# Check target columns
missing_targets <- target_cols[!target_cols %in% df_cols]
existing_targets <- target_cols[target_cols %in% df_cols]
cat("\nTarget Columns:\n")
cat(" Expected: 3 columns\n")
cat(" Found:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n Missing target columns:\n")
for (col in missing_targets) {
cat(" -", col, "\n")
}
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# ============= PROCESS AOT SCALE =============
cat("Processing AOT scale...\n")
# Convert AOT columns to numeric (handling any non-numeric values)
for (col in aot_cols) {
if (col %in% names(df)) {
df[[col]] <- as.numeric(df[[col]])
}
}
# Calculate average with reverse coding (WITHOUT modifying original values)
# Items 4, 5, 6, 7 are reverse coded for calculation only
df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
# Create a copy for calculation
values <- as.numeric(row)
# Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
reverse_positions <- c(4, 5, 6, 7)
values[reverse_positions] <- values[reverse_positions] * -1
# Return mean (na.rm = TRUE handles missing values)
mean(values, na.rm = TRUE)
})
cat(" AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
cat(" Original AOT item values preserved in dataframe.\n\n")
# ============= PROCESS CRT SCALES =============
cat("Processing CRT scales...\n")
# Initialize CRT columns
df$crt_correct <- NA
df$crt_int <- NA
# Process each row
for (i in 1:nrow(df)) {
# CRT Correct
crt_correct_count <- 0
crt_correct_n <- 0
for (j in 1:3) {
col <- crt_cols[j]
if (col %in% names(df)) {
response <- trimws(tolower(as.character(df[i, col])))
correct_answer <- tolower(crt_correct_answers[j])
if (!is.na(response) && response != "") {
crt_correct_n <- crt_correct_n + 1
if (response == correct_answer) {
crt_correct_count <- crt_correct_count + 1
}
}
}
}
# Calculate proportion correct
if (crt_correct_n > 0) {
df$crt_correct[i] <- crt_correct_count / crt_correct_n
}
# CRT Intuitive
crt_int_count <- 0
crt_int_n <- 0
for (j in 1:3) {
col <- crt_cols[j]
if (col %in% names(df)) {
response <- trimws(tolower(as.character(df[i, col])))
intuitive_answer <- tolower(crt_intuitive_answers[j])
if (!is.na(response) && response != "") {
crt_int_n <- crt_int_n + 1
if (response == intuitive_answer) {
crt_int_count <- crt_int_count + 1
}
}
}
}
# Calculate proportion intuitive
if (crt_int_n > 0) {
df$crt_int[i] <- crt_int_count / crt_int_n
}
}
cat(" CRT correct and intuitive scores calculated.\n\n")
cat("=== PROCESSING COMPLETE ===\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
# This function can be run multiple times to check different random rows
qa_check_random_row <- function() {
# Pick a random row
random_row <- sample(1:nrow(df), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
cat("========================================\n\n")
# AOT Check
cat("--- AOT SCALE ---\n")
cat("Source values (original in CSV):\n")
aot_original <- numeric(8)
aot_for_calc <- numeric(8)
for (i in 1:8) {
col <- aot_cols[i]
val <- if (col %in% names(df)) df[random_row, col] else NA
aot_original[i] <- val
# Apply reversal for items 4-7
if (i %in% 4:7) {
aot_for_calc[i] <- val * -1
cat(sprintf(" %s: %s (reversed to %s for calculation)\n",
col,
ifelse(is.na(val), "NA", as.character(val)),
ifelse(is.na(val), "NA", as.character(val * -1))))
} else {
aot_for_calc[i] <- val
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
}
}
# Manual calculation check
valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
if (length(valid_aot) > 0) {
expected_mean <- mean(valid_aot)
actual_value <- df$aot_total[random_row]
cat(sprintf("\nCalculation check:\n"))
cat(sprintf(" Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
cat(sprintf(" Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
cat(sprintf(" Target value (aot_total): %.5f\n", actual_value))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat("\n No valid AOT values to calculate.\n")
}
# CRT Check
cat("\n--- CRT SCALE ---\n")
cat("Source values:\n")
crt_correct_count <- 0
crt_int_count <- 0
crt_n <- 0
for (i in 1:3) {
col <- crt_cols[i]
val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
val_trimmed <- trimws(tolower(val))
correct_ans <- crt_correct_answers[i]
intuitive_ans <- crt_intuitive_answers[i]
is_correct <- val_trimmed == tolower(correct_ans)
is_intuitive <- val_trimmed == tolower(intuitive_ans)
if (val_trimmed != "" && !is.na(val_trimmed)) {
crt_n <- crt_n + 1
if (is_correct) crt_correct_count <- crt_correct_count + 1
if (is_intuitive) crt_int_count <- crt_int_count + 1
}
cat(sprintf(" %s: '%s'\n", col, val))
cat(sprintf(" Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
cat(sprintf(" Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
}
cat("\nCalculation check:\n")
if (crt_n > 0) {
expected_correct <- crt_correct_count / crt_n
expected_int <- crt_int_count / crt_n
actual_correct <- df$crt_correct[random_row]
actual_int <- df$crt_int[random_row]
cat(sprintf(" Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
cat(sprintf(" Target value (crt_correct): %.5f\n", actual_correct))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
cat(sprintf("\n Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
cat(sprintf(" Target value (crt_int): %.5f\n", actual_int))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat(" No valid CRT responses to calculate.\n")
}
cat("\n========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on first random row
cat("\n\n")
qa_check_random_row()
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
cat("Run this command in R console:\n")
cat(" qa_check_random_row()\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
cat("Review the output above, then uncomment line 253 to save changes.\n")
cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")

View File

@ -0,0 +1,292 @@
# Script to calculate absolute differences between time intervals in eohi2.csv
# Compares present vs past/future, and 5-year vs 10-year intervals
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define the 15 item names (same order for all time periods)
items <- c(
"pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
)
# Note: present uses lowercase "tv", others use uppercase "TV"
items_present <- c(
"pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
)
# Define all source columns (75 total)
source_cols <- c(
paste0("present_", items_present),
paste0("past_5_", items),
paste0("past_10_", items),
paste0("fut_5_", items),
paste0("fut_10_", items)
)
# Define all target columns (90 total = 6 calculation types × 15 items)
target_NPast_5 <- paste0("NPast_5_", items)
target_NPast_10 <- paste0("NPast_10_", items)
target_NFut_5 <- paste0("NFut_5_", items)
target_NFut_10 <- paste0("NFut_10_", items)
target_5_10past <- paste0("5.10past_", items)
target_5_10fut <- paste0("5.10fut_", items)
target_cols <- c(
target_NPast_5,
target_NPast_10,
target_NFut_5,
target_NFut_10,
target_5_10past,
target_5_10fut
)
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Check Source columns
missing_source <- source_cols[!source_cols %in% df_cols]
existing_source <- source_cols[source_cols %in% df_cols]
cat("Source Columns:\n")
cat(" Expected: 75 columns\n")
cat(" Found:", length(existing_source), "columns\n")
cat(" Missing:", length(missing_source), "columns\n")
if (length(missing_source) > 0 && length(missing_source) <= 10) {
cat("\n Missing source columns:\n")
for (col in missing_source) {
cat(" -", col, "\n")
}
} else if (length(missing_source) > 10) {
cat("\n Too many missing to list individually (", length(missing_source), "missing)\n")
}
# Check Target columns
missing_targets <- target_cols[!target_cols %in% df_cols]
existing_targets <- target_cols[target_cols %in% df_cols]
cat("\nTarget Columns:\n")
cat(" Expected: 90 columns\n")
cat(" Found:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n Target columns do NOT exist yet - will create them.\n")
if (length(existing_targets) > 0) {
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
}
} else {
cat(" All target columns exist - will overwrite with calculated values.\n")
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_source) > 30) {
stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# ============= CALCULATE DIFFERENCES =============
cat("Calculating time interval differences...\n")
# Convert source columns to numeric
for (col in source_cols) {
if (col %in% names(df)) {
df[[col]] <- as.numeric(df[[col]])
}
}
# Helper function to calculate absolute difference
calc_abs_diff <- function(col1, col2) {
val1 <- if (col1 %in% names(df)) df[[col1]] else NA
val2 <- if (col2 %in% names(df)) df[[col2]] else NA
abs(val1 - val2)
}
# Calculate NPast_5: |present - past_5|
cat(" Calculating NPast_5 differences (present vs past 5 years)...\n")
for (i in 1:15) {
target <- target_NPast_5[i]
source1 <- paste0("present_", items_present[i])
source2 <- paste0("past_5_", items[i])
df[[target]] <- calc_abs_diff(source1, source2)
}
# Calculate NPast_10: |present - past_10|
cat(" Calculating NPast_10 differences (present vs past 10 years)...\n")
for (i in 1:15) {
target <- target_NPast_10[i]
source1 <- paste0("present_", items_present[i])
source2 <- paste0("past_10_", items[i])
df[[target]] <- calc_abs_diff(source1, source2)
}
# Calculate NFut_5: |present - fut_5|
cat(" Calculating NFut_5 differences (present vs future 5 years)...\n")
for (i in 1:15) {
target <- target_NFut_5[i]
source1 <- paste0("present_", items_present[i])
source2 <- paste0("fut_5_", items[i])
df[[target]] <- calc_abs_diff(source1, source2)
}
# Calculate NFut_10: |present - fut_10|
cat(" Calculating NFut_10 differences (present vs future 10 years)...\n")
for (i in 1:15) {
target <- target_NFut_10[i]
source1 <- paste0("present_", items_present[i])
source2 <- paste0("fut_10_", items[i])
df[[target]] <- calc_abs_diff(source1, source2)
}
# Calculate 5.10past: |past_5 - past_10|
cat(" Calculating 5.10past differences (past 5 vs past 10 years)...\n")
for (i in 1:15) {
target <- target_5_10past[i]
source1 <- paste0("past_5_", items[i])
source2 <- paste0("past_10_", items[i])
df[[target]] <- calc_abs_diff(source1, source2)
}
# Calculate 5.10fut: |fut_5 - fut_10|
cat(" Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
for (i in 1:15) {
target <- target_5_10fut[i]
source1 <- paste0("fut_5_", items[i])
source2 <- paste0("fut_10_", items[i])
df[[target]] <- calc_abs_diff(source1, source2)
}
cat("\n=== CALCULATION COMPLETE ===\n")
cat(" 90 difference columns created.\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
# This function can be run multiple times to check different random rows and items
qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
# Pick a random row or use specified row
if (is.null(row_num)) {
random_row <- sample(seq_len(nrow(df)), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
} else {
if (row_num < 1 || row_num > nrow(df)) {
cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
return()
}
random_row <- row_num
cat("\n========================================\n")
cat("QA CHECK: Specified Row #", random_row, "\n")
}
# Pick a random item or use specified item
if (is.null(item_num)) {
test_item_idx <- sample(1:15, 1)
cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
} else {
if (item_num < 1 || item_num > 15) {
cat("ERROR: Item number must be between 1 and 15\n")
return()
}
test_item_idx <- item_num
cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
}
cat("========================================\n\n")
calculations <- list(
list(name = "NPast_5", target = target_NPast_5[test_item_idx],
source1 = paste0("present_", items_present[test_item_idx]),
source2 = paste0("past_5_", items[test_item_idx]),
desc = "|present - past_5|"),
list(name = "NPast_10", target = target_NPast_10[test_item_idx],
source1 = paste0("present_", items_present[test_item_idx]),
source2 = paste0("past_10_", items[test_item_idx]),
desc = "|present - past_10|"),
list(name = "NFut_5", target = target_NFut_5[test_item_idx],
source1 = paste0("present_", items_present[test_item_idx]),
source2 = paste0("fut_5_", items[test_item_idx]),
desc = "|present - fut_5|"),
list(name = "NFut_10", target = target_NFut_10[test_item_idx],
source1 = paste0("present_", items_present[test_item_idx]),
source2 = paste0("fut_10_", items[test_item_idx]),
desc = "|present - fut_10|"),
list(name = "5.10past", target = target_5_10past[test_item_idx],
source1 = paste0("past_5_", items[test_item_idx]),
source2 = paste0("past_10_", items[test_item_idx]),
desc = "|past_5 - past_10|"),
list(name = "5.10fut", target = target_5_10fut[test_item_idx],
source1 = paste0("fut_5_", items[test_item_idx]),
source2 = paste0("fut_10_", items[test_item_idx]),
desc = "|fut_5 - fut_10|")
)
for (calc in calculations) {
cat(sprintf("--- %s ---\n", calc$name))
cat(sprintf("Formula: %s\n", calc$desc))
val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
target_val <- df[random_row, calc$target]
cat(sprintf(" %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
cat(sprintf(" %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
if (!is.na(val1) && !is.na(val2)) {
expected_diff <- abs(val1 - val2)
cat(sprintf("\n Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
cat(sprintf(" Target (%s): %.5f\n", calc$target, target_val))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat(" Cannot calculate (missing values)\n")
}
cat("\n")
}
cat("========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on random row and random item
cat("\n\n")
qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
cat("For random row AND random item, run:\n")
cat(" qa_check_random_row()\n")
cat("\nFor specific row (e.g., row 118) with random item:\n")
cat(" qa_check_random_row(118)\n")
cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
cat(" qa_check_random_row(item_num = 5)\n")
cat("\nFor specific row AND specific item:\n")
cat(" qa_check_random_row(118, 5)\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
cat("Review the output above, then uncomment line 243 to save changes.\n")
cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")

View File

@ -0,0 +1,265 @@
# Script to calculate domain means for time interval differences in eohi2.csv
# Averages the 5 items within each domain (pref, pers, val) for each time interval type
# Load necessary library
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data (with check.names=FALSE to preserve original column names)
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
# Define the 15 item names (same order for all time periods)
items <- c(
"pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
)
# Define domain groupings (indices in items vector)
pref_indices <- 1:5
pers_indices <- 6:10
val_indices <- 11:15
# Define time interval prefixes
time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
# Define domain names
domain_names <- c("pref", "pers", "val")
# Define all source columns (90 total)
source_cols <- c(
paste0("NPast_5_", items),
paste0("NPast_10_", items),
paste0("NFut_5_", items),
paste0("NFut_10_", items),
paste0("X5.10past_", items),
paste0("X5.10fut_", items)
)
# Define all target columns (18 total = 6 time intervals × 3 domains)
target_cols <- c(
paste0("NPast_5_", domain_names, "_MEAN"),
paste0("NPast_10_", domain_names, "_MEAN"),
paste0("NFut_5_", domain_names, "_MEAN"),
paste0("NFut_10_", domain_names, "_MEAN"),
paste0("X5.10past_", domain_names, "_MEAN"),
paste0("X5.10fut_", domain_names, "_MEAN")
)
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
# Get actual column names from dataframe (trimmed)
df_cols <- trimws(names(df))
# Check Source columns
missing_source <- source_cols[!source_cols %in% df_cols]
existing_source <- source_cols[source_cols %in% df_cols]
cat("Source Columns:\n")
cat(" Expected: 90 columns\n")
cat(" Found:", length(existing_source), "columns\n")
cat(" Missing:", length(missing_source), "columns\n")
if (length(missing_source) > 0 && length(missing_source) <= 20) {
cat("\n Missing source columns:\n")
for (col in missing_source) {
cat(" -", col, "\n")
}
} else if (length(missing_source) > 20) {
cat("\n Too many missing to list individually (", length(missing_source), "missing)\n")
}
# Check Target columns
missing_targets <- target_cols[!target_cols %in% df_cols]
existing_targets <- target_cols[target_cols %in% df_cols]
cat("\nTarget Columns:\n")
cat(" Expected: 18 columns\n")
cat(" Found:", length(existing_targets), "columns\n")
cat(" Missing:", length(missing_targets), "columns\n")
if (length(missing_targets) > 0) {
cat("\n Target columns do NOT exist yet - will create them.\n")
if (length(existing_targets) > 0) {
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
}
} else {
cat(" All target columns exist - will overwrite with calculated values.\n")
}
cat("\n=== END CHECK ===\n\n")
# Stop if critical columns are missing
if (length(missing_source) > 45) {
stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
}
cat("Proceeding with processing...\n\n")
# ============= CALCULATE DOMAIN MEANS =============
cat("Calculating domain means for time interval differences...\n")
# Convert source columns to numeric
for (col in source_cols) {
if (col %in% names(df)) {
df[[col]] <- as.numeric(df[[col]])
}
}
# Calculate means for each time interval × domain combination
for (time_prefix in time_prefixes) {
# Preferences mean
pref_cols <- paste0(time_prefix, "_", items[pref_indices])
existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
if (length(existing_pref_cols) > 0) {
df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
cat(" Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
}
# Personality mean
pers_cols <- paste0(time_prefix, "_", items[pers_indices])
existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
if (length(existing_pers_cols) > 0) {
df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
cat(" Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
}
# Values mean
val_cols <- paste0(time_prefix, "_", items[val_indices])
existing_val_cols <- val_cols[val_cols %in% names(df)]
if (length(existing_val_cols) > 0) {
df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
cat(" Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
}
}
cat("\n=== CALCULATION COMPLETE ===\n")
cat(" 18 domain mean columns created.\n\n")
# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
# This function can be run multiple times to check different random rows and time intervals
qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
# Pick a random row or use specified row
if (is.null(row_num)) {
random_row <- sample(seq_len(nrow(df)), 1)
cat("\n========================================\n")
cat("QA CHECK: Random Row #", random_row, "\n")
} else {
if (row_num < 1 || row_num > nrow(df)) {
cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
return()
}
random_row <- row_num
cat("\n========================================\n")
cat("QA CHECK: Specified Row #", random_row, "\n")
}
# Pick a random time interval or use specified interval
if (is.null(time_interval_num)) {
test_interval_idx <- sample(1:6, 1)
cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
} else {
if (time_interval_num < 1 || time_interval_num > 6) {
cat("ERROR: Time interval number must be between 1 and 6\n")
cat(" 1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
return()
}
test_interval_idx <- time_interval_num
cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
}
cat("========================================\n\n")
time_prefix <- time_prefixes[test_interval_idx]
# Check each of the 3 domains
for (domain_idx in 1:3) {
domain_name <- domain_names[domain_idx]
# Get the appropriate item indices
if (domain_idx == 1) {
item_indices <- pref_indices
domain_label <- "Preferences"
} else if (domain_idx == 2) {
item_indices <- pers_indices
domain_label <- "Personality"
} else {
item_indices <- val_indices
domain_label <- "Values"
}
cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
# Get source column names
source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
# Get values
values <- numeric(5)
cat("Source values:\n")
for (i in 1:5) {
col <- source_cols_domain[i]
val <- if (col %in% names(df)) df[random_row, col] else NA
values[i] <- val
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
}
# Calculate expected mean
valid_values <- values[!is.na(values)]
if (length(valid_values) > 0) {
expected_mean <- mean(valid_values)
actual_value <- df[random_row, target_col]
cat(sprintf("\nCalculation:\n"))
cat(sprintf(" Sum: %s = %.5f\n",
paste(sprintf("%.5f", valid_values), collapse = " + "),
sum(valid_values)))
cat(sprintf(" Average of %d values: %.5f\n", length(valid_values), expected_mean))
cat(sprintf(" Target (%s): %.5f\n", target_col, actual_value))
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
} else {
cat(" No valid values to calculate mean.\n")
}
cat("\n")
}
cat("========================================\n")
cat("END QA CHECK\n")
cat("========================================\n\n")
}
# Run QA check on random row and random time interval
cat("\n\n")
qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
# Instructions for running additional checks
cat("\n")
cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
cat("For random row AND random time interval, run:\n")
cat(" qa_check_random_row()\n")
cat("\nFor specific row (e.g., row 118) with random interval:\n")
cat(" qa_check_random_row(118)\n")
cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
cat(" qa_check_random_row(time_interval_num = 3)\n")
cat("\nFor specific row AND specific interval:\n")
cat(" qa_check_random_row(118, 3)\n")
cat("\n")
cat("Time Interval Numbers:\n")
cat(" 1 = NPast_5, 2 = NPast_10, 3 = NFut_5\n")
cat(" 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
cat("\n")
# Save the modified dataframe back to CSV
# na="" writes NA values as empty cells instead of "NA" text
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
cat("Review the output above, then uncomment line 234 to save changes.\n")
cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")

View File

@ -0,0 +1,95 @@
# Script 08: Create 5_10 DGEN Variables
# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
# for both Past and Future directions
# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
library(tidyverse)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Read the data
data <- read.csv("eohi2.csv")
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
print(paste("Number of participants:", length(unique(data$ResponseId))))
# Verify source columns exist
source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
missing_vars <- source_vars[!source_vars %in% colnames(data)]
if (length(missing_vars) > 0) {
stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
}
print("All source DGEN variables found!")
# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
# Formula: |DGEN_5 - DGEN_10|
# NOTE: Using X prefix because R adds it to column names starting with numbers
# PAST direction
data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
# FUTURE direction
data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
# Verify variables were created
target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
print("\n=== VARIABLES CREATED ===")
print(target_vars)
# Check for missing values
for(var in target_vars) {
n_missing <- sum(is.na(data[[var]]))
pct_missing <- round(100 * n_missing / nrow(data), 2)
print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
}
# Quality check: Display sample rows
print("\n=== QUALITY CHECK: Sample Calculations ===")
sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
for(i in sample_rows) {
print(sprintf("\nRow %d:", i))
print(sprintf(" DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f",
data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
print(sprintf(" → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
data$X5_10DGEN_past_pref[i],
abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
print(sprintf(" DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
print(sprintf(" → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
data$X5_10DGEN_fut_pers[i],
abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
}
# Descriptive statistics
print("\n=== DESCRIPTIVE STATISTICS ===")
desc_stats <- data %>%
summarise(across(all_of(target_vars),
list(n = ~sum(!is.na(.)),
mean = ~round(mean(., na.rm = TRUE), 5),
sd = ~round(sd(., na.rm = TRUE), 5),
min = ~round(min(., na.rm = TRUE), 5),
max = ~round(max(., na.rm = TRUE), 5)),
.names = "{.col}_{.fn}"))
print(t(desc_stats))
# Save to CSV
write.csv(data, "eohi2.csv", row.names = FALSE)
print("\n=== PROCESSING COMPLETE ===")
print("Data saved to eohi2.csv")
print(paste("Total columns now:", ncol(data)))

View File

@ -0,0 +1,223 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Set 1: NPast_5_mean (15 variables)
data$NPast_5_mean <- rowMeans(data[, c(
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
)], na.rm = TRUE)
# Set 2: NPast_10_mean (15 variables)
data$NPast_10_mean <- rowMeans(data[, c(
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
)], na.rm = TRUE)
# Set 3: NFut_5_mean (15 variables)
data$NFut_5_mean <- rowMeans(data[, c(
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
)], na.rm = TRUE)
# Set 4: NFut_10_mean (15 variables)
data$NFut_10_mean <- rowMeans(data[, c(
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
)], na.rm = TRUE)
# Set 5: X5.10past_mean (15 variables)
data$X5.10past_mean <- rowMeans(data[, c(
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
)], na.rm = TRUE)
# Set 6: X5.10fut_mean (15 variables)
data$X5.10fut_mean <- rowMeans(data[, c(
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
)], na.rm = TRUE)
# Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
data$NPast_global_mean <- rowMeans(data[, c(
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
)], na.rm = TRUE)
# Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
data$NFut_global_mean <- rowMeans(data[, c(
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
)], na.rm = TRUE)
# Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
data$X5.10_global_mean <- rowMeans(data[, c(
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
)], na.rm = TRUE)
# Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
data$N5_global_mean <- rowMeans(data[, c(
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
)], na.rm = TRUE)
# Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
data$N10_global_mean <- rowMeans(data[, c(
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
)], na.rm = TRUE)
# Save the data
write.csv(data, "eohi2.csv", row.names = FALSE)
# ===== QA CODE: Check first 5 rows =====
cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
for (i in 1:min(5, nrow(data))) {
cat("--- Row", i, "---\n")
# Set 1: NPast_5_mean
calc1 <- mean(as.numeric(data[i, c(
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
)]), na.rm = TRUE)
cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i],
"| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
# Set 2: NPast_10_mean
calc2 <- mean(as.numeric(data[i, c(
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
)]), na.rm = TRUE)
cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i],
"| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
# Set 3: NFut_5_mean
calc3 <- mean(as.numeric(data[i, c(
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
)]), na.rm = TRUE)
cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i],
"| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
# Set 4: NFut_10_mean
calc4 <- mean(as.numeric(data[i, c(
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
)]), na.rm = TRUE)
cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i],
"| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
# Set 5: X5.10past_mean
calc5 <- mean(as.numeric(data[i, c(
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
)]), na.rm = TRUE)
cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i],
"| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
# Set 6: X5.10fut_mean
calc6 <- mean(as.numeric(data[i, c(
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
)]), na.rm = TRUE)
cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i],
"| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
# Set 7: NPast_global_mean
calc7 <- mean(as.numeric(data[i, c(
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
)]), na.rm = TRUE)
cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i],
"| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
# Set 8: NFut_global_mean
calc8 <- mean(as.numeric(data[i, c(
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
)]), na.rm = TRUE)
cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i],
"| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
# Set 9: X5.10_global_mean
calc9 <- mean(as.numeric(data[i, c(
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
)]), na.rm = TRUE)
cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i],
"| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
# Set 10: N5_global_mean
calc10 <- mean(as.numeric(data[i, c(
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
)]), na.rm = TRUE)
cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i],
"| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
# Set 11: N10_global_mean
calc11 <- mean(as.numeric(data[i, c(
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
)]), na.rm = TRUE)
cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i],
"| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
}
cat("=== QA CHECK COMPLETE ===\n")

View File

@ -0,0 +1,115 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Set 1: DGEN_past_5.10_mean (3 variables)
data$DGEN_past_5.10_mean <- rowMeans(data[, c(
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
)], na.rm = TRUE)
# Set 2: DGEN_fut_5.10_mean (3 variables)
data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
)], na.rm = TRUE)
# Set 3: DGENpast_global_mean (9 variables)
data$DGENpast_global_mean <- rowMeans(data[, c(
"DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
"DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
"DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
)], na.rm = TRUE)
# Set 4: DGENfut_global_mean (9 variables)
data$DGENfut_global_mean <- rowMeans(data[, c(
"DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
"DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
"DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
)], na.rm = TRUE)
# Set 5: DGEN_5_global_mean (6 variables)
data$DGEN_5_global_mean <- rowMeans(data[, c(
"DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
)], na.rm = TRUE)
# Set 6: DGEN_10_global_mean (6 variables)
data$DGEN_10_global_mean <- rowMeans(data[, c(
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
)], na.rm = TRUE)
# Set 7: DGEN_5.10_global_mean (6 variables)
data$DGEN_5.10_global_mean <- rowMeans(data[, c(
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
)], na.rm = TRUE)
# Save the data
write.csv(data, "eohi2.csv", row.names = FALSE)
# ===== QA CODE: Check first 5 rows =====
cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
for (i in 1:min(5, nrow(data))) {
cat("--- Row", i, "---\n")
# Set 1: DGEN_past_5.10_mean
calc1 <- mean(as.numeric(data[i, c(
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
)]), na.rm = TRUE)
cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i],
"| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
# Set 2: DGEN_fut_5.10_mean
calc2 <- mean(as.numeric(data[i, c(
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
)]), na.rm = TRUE)
cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i],
"| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
# Set 3: DGENpast_global_mean
calc3 <- mean(as.numeric(data[i, c(
"DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
"DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
"DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
)]), na.rm = TRUE)
cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i],
"| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
# Set 4: DGENfut_global_mean
calc4 <- mean(as.numeric(data[i, c(
"DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
"DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
"DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
)]), na.rm = TRUE)
cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i],
"| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
# Set 5: DGEN_5_global_mean
calc5 <- mean(as.numeric(data[i, c(
"DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
)]), na.rm = TRUE)
cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i],
"| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
# Set 6: DGEN_10_global_mean
calc6 <- mean(as.numeric(data[i, c(
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
)]), na.rm = TRUE)
cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i],
"| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
# Set 7: DGEN_5.10_global_mean
calc7 <- mean(as.numeric(data[i, c(
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
)]), na.rm = TRUE)
cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i],
"| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
}
cat("=== QA CHECK COMPLETE ===\n")

View File

@ -0,0 +1,235 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Create EHI difference variables (NPast - NFut) for different time intervals
# === 5-YEAR PAST-FUTURE PAIRS ===
# Preferences
data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
# Personality
data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
# Values
data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
# === 10-YEAR PAST-FUTURE PAIRS ===
# Preferences
data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
# Personality
data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
# Values
data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
# === 5-10 YEAR CHANGE VARIABLES ===
# Preferences
data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
# Personality
data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
# Values
data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
# QA: Verify calculations - FIRST 5 ROWS with detailed output
cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
cat("--- 5-YEAR VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" pref_read: %g - %g = %g | ehi5_pref_read = %g %s\n",
data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
data$ehi5_pref_read[i],
ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pref_music: %g - %g = %g | ehi5_pref_music = %g %s\n",
data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
data$ehi5_pref_music[i],
ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
data$ehi5_pers_extravert[i],
ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
}
cat("\n--- 10-YEAR VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" pref_read: %g - %g = %g | ehi10_pref_read = %g %s\n",
data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
data$ehi10_pref_read[i],
ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pref_music: %g - %g = %g | ehi10_pref_music = %g %s\n",
data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
data$ehi10_pref_music[i],
ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
data$ehi10_pers_extravert[i],
ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
}
cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" pref_read: %g - %g = %g | ehi5.10_pref_read = %g %s\n",
data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
data$ehi5.10_pref_read[i],
ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pref_music: %g - %g = %g | ehi5.10_pref_music = %g %s\n",
data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
data$ehi5.10_pref_music[i],
ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
data$ehi5.10_pers_extravert[i],
ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
}
# Full QA check for all rows and all variables
cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
qa_pairs <- list(
# 5-year pairs
list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
# 10-year pairs
list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
# 5-10 year change pairs
list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
)
all_checks_passed <- TRUE
for (pair in qa_pairs) {
# Calculate expected difference
expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
# Get actual value in target variable
actual_value <- data[[pair$target]]
# Compare (allowing for floating point precision issues)
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s\n", pair$target))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
# Show first discrepancy details
row_num <- discrepancies[1]
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
row_num,
pair$npast, data[[pair$npast]][row_num],
pair$nfut, data[[pair$nfut]][row_num],
expected_diff[row_num],
pair$target, actual_value[row_num]))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(data, "eohi2.csv", row.names = FALSE)
cat("\nDataset saved to eohi2.csv\n")

View File

@ -0,0 +1,118 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Create DGEN EHI difference variables (Past - Future) for different time intervals
# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
# QA: Verify calculations - FIRST 5 ROWS with detailed output
cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
cat("--- 5-YEAR DGEN VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
data$ehiDGEN_5_Pref[i],
ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
data$ehiDGEN_5_Pers[i],
ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" Val: %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
data$ehiDGEN_5_Val[i],
ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
}
cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
for (i in 1:5) {
cat(sprintf("\nRow %d:\n", i))
cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
data$ehiDGEN_10_Pref[i],
ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
data$ehiDGEN_10_Pers[i],
ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
cat(sprintf(" Val: %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
data$ehiDGEN_10_Val[i],
ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
}
# Full QA check for all rows and all variables
cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
qa_pairs <- list(
# 5-year DGEN pairs
list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
# 10-year DGEN pairs
list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
)
all_checks_passed <- TRUE
for (pair in qa_pairs) {
# Calculate expected difference
expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
# Get actual value in target variable
actual_value <- data[[pair$target]]
# Compare (allowing for floating point precision issues)
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s\n", pair$target))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
# Show first discrepancy details
row_num <- discrepancies[1]
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
row_num,
pair$npast, data[[pair$npast]][row_num],
pair$nfut, data[[pair$nfut]][row_num],
expected_diff[row_num],
pair$target, actual_value[row_num]))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(data, "eohi2.csv", row.names = FALSE)
cat("\nDataset saved to eohi2.csv\n")

View File

@ -0,0 +1,161 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Calculate domain-specific mean scores for EHI variables across time intervals
# === 5-YEAR MEANS ===
data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music",
"ehi5_pref_TV", "ehi5_pref_nap",
"ehi5_pref_travel")], na.rm = TRUE)
data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical",
"ehi5_pers_dependable", "ehi5_pers_anxious",
"ehi5_pers_complex")], na.rm = TRUE)
data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad",
"ehi5_val_opinion", "ehi5_val_performance",
"ehi5_val_justice")], na.rm = TRUE)
# === 10-YEAR MEANS ===
data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music",
"ehi10_pref_TV", "ehi10_pref_nap",
"ehi10_pref_travel")], na.rm = TRUE)
data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical",
"ehi10_pers_dependable", "ehi10_pers_anxious",
"ehi10_pers_complex")], na.rm = TRUE)
data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad",
"ehi10_val_opinion", "ehi10_val_performance",
"ehi10_val_justice")], na.rm = TRUE)
# === 5-10 YEAR CHANGE MEANS ===
data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music",
"ehi5.10_pref_TV", "ehi5.10_pref_nap",
"ehi5.10_pref_travel")], na.rm = TRUE)
data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical",
"ehi5.10_pers_dependable", "ehi5.10_pers_anxious",
"ehi5.10_pers_complex")], na.rm = TRUE)
data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad",
"ehi5.10_val_opinion", "ehi5.10_val_performance",
"ehi5.10_val_justice")], na.rm = TRUE)
# QA: Verify mean calculations
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
cat("Verifying EHI domain-specific mean calculations\n\n")
cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i],
data$ehi5_pref_TV[i], data$ehi5_pref_nap[i],
data$ehi5_pref_travel[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi5_pref_MEAN[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i],
data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i],
data$ehi5_pers_complex[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi5_pers_MEAN[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i],
data$ehi10_pref_TV[i], data$ehi10_pref_nap[i],
data$ehi10_pref_travel[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi10_pref_MEAN[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i],
data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i],
data$ehi5.10_pref_travel[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi5.10_pref_MEAN[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3], vals[4], vals[5],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
# Overall QA check for all rows
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
qa_checks <- list(
# 5-year means
list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
target = "ehi5_pers_MEAN", name = "5-Year Personality"),
list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
target = "ehi5_val_MEAN", name = "5-Year Values"),
# 10-year means
list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
target = "ehi10_pers_MEAN", name = "10-Year Personality"),
list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
target = "ehi10_val_MEAN", name = "10-Year Values"),
# 5-10 year change means
list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
)
all_checks_passed <- TRUE
for (check in qa_checks) {
calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
actual_mean <- data[[check$target]]
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
check$name, length(check$vars), nrow(data)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(data, "eohi2.csv", row.names = FALSE)
cat("\nDataset saved to eohi2.csv\n")

View File

@ -0,0 +1,140 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
# Load data
data <- read.csv("eohi2.csv")
# Calculate global mean scores for EHI variables across time intervals
# === DGEN 5-YEAR GLOBAL MEAN ===
data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers",
"ehiDGEN_5_Val")], na.rm = TRUE)
# === DGEN 10-YEAR GLOBAL MEAN ===
data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers",
"ehiDGEN_10_Val")], na.rm = TRUE)
# === 5-YEAR GLOBAL MEAN ===
data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN",
"ehi5_val_MEAN")], na.rm = TRUE)
# === 10-YEAR GLOBAL MEAN ===
data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN",
"ehi10_val_MEAN")], na.rm = TRUE)
# === 5-10 YEAR CHANGE GLOBAL MEAN ===
data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN",
"ehi5.10_val_MEAN")], na.rm = TRUE)
# QA: Verify mean calculations
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
cat("Verifying EHI global mean calculations\n\n")
cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i],
data$ehiDGEN_5_Val[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehiDGEN_5_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i],
data$ehiDGEN_10_Val[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehiDGEN_10_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i],
data$ehi5_val_MEAN[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi5_global_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i],
data$ehi10_val_MEAN[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi10_global_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
for (i in 1:5) {
vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i],
data$ehi5.10_val_MEAN[i])
calc_mean <- mean(vals, na.rm = TRUE)
actual_mean <- data$ehi5.10_global_mean[i]
match <- abs(calc_mean - actual_mean) < 1e-10
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
i, vals[1], vals[2], vals[3],
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
}
# Overall QA check for all rows
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
qa_checks <- list(
# DGEN global means
list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
# Domain-specific global means
list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
target = "ehi5_global_mean", name = "5-Year Global"),
list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
target = "ehi10_global_mean", name = "10-Year Global"),
list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
)
all_checks_passed <- TRUE
for (check in qa_checks) {
calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
actual_mean <- data[[check$target]]
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
if (length(discrepancies) > 0) {
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
cat(sprintf(" Found %d discrepancies in rows: %s\n",
length(discrepancies),
paste(head(discrepancies, 10), collapse = ", ")))
all_checks_passed <- FALSE
} else {
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
check$name, length(check$vars), nrow(data)))
}
}
cat("\n")
if (all_checks_passed) {
cat("*** ALL QA CHECKS PASSED ***\n")
} else {
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
}
# Save updated dataset
write.csv(data, "eohi2.csv", row.names = FALSE)
cat("\nDataset saved to eohi2.csv\n")

View File

@ -0,0 +1,38 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
data <- read.csv("eohi2.csv")
# Check the levels of the demo_edu variable
print(levels(factor(data$demo_edu)))
# Also show the unique values and their frequencies
print("\nUnique values and frequencies:")
print(table(data$demo_edu, useNA = "ifany"))
# Recode demo_edu into 3 ordinal levels
data$edu3 <- NA
# HS_TS: High School and Trade School
data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
# C_Ug: College and University - Undergraduate
data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
# grad_prof: University - Graduate, University - PhD, and Professional Degree
data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
# Convert to ordered factor
data$edu3 <- factor(data$edu3,
levels = c("HS_TS", "C_Ug", "grad_prof"),
ordered = TRUE)
# Check the recoded variable
print(table(data$edu3, useNA = "ifany"))
# Verify the recoding
print(table(data$demo_edu, data$edu3, useNA = "ifany"))
# Save the updated dataset with the new edu3 variable
write.csv(data, "eohi2.csv", row.names = FALSE)

View File

@ -0,0 +1,100 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
df <- read.csv("eohi2.csv")
# Display means and standard deviations of non-standardized variables for manual checking
print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
# Calculate means and standard deviations for standardization
mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
# Create standardized variables
df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
# Check that variables have been standardized
print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
# Calculate mean of standardized variables
df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
#### check random 10 rows
# Check 10 random rows to verify calculations
set.seed(123) # For reproducible random selection
random_rows <- sample(nrow(df), 10)
cat("Checking 10 random rows:\n")
cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
for(i in random_rows) {
orig_5 <- df$ehiDGEN_5_mean[i]
std_5 <- df$stdDGEN_5[i]
calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
orig_10 <- df$ehiDGEN_10_mean[i]
std_10 <- df$stdDGEN_10[i]
calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n",
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
}
cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
for(i in random_rows) {
orig_5 <- df$ehi5_global_mean[i]
std_5 <- df$stdDS_5[i]
calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
orig_10 <- df$ehi10_global_mean[i]
std_10 <- df$stdDS_10[i]
calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n",
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
}
# Show the final stdEHI_mean for these rows
cat("\nRow | stdEHI_mean | Manual calc\n")
cat("----|-------------|------------\n")
for(i in random_rows) {
manual_mean <- -0.042564413 -0.158849227 -1.444812436 -0.23426232 -0.470122099
mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
}
# Write to CSV
write.csv(df, "eohi2.csv", row.names = FALSE)

View File

@ -0,0 +1,24 @@
"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
"ehiDGEN_5_Pref",1,489,0.06339,1.9954,0,0.03308,1.4826,-9,9,18,0.27576,4.00366,0.09024
"ehiDGEN_5_Pers",2,489,0.09816,1.85725,0,0.1145,1.4826,-9,7,16,-0.18711,3.23028,0.08399
"ehiDGEN_5_Val",3,489,0.02454,2.16042,0,0.06361,1.4826,-9,10,19,0.1765,4.44483,0.0977
"ehiDGEN_10_Pref",4,489,0.20245,2.29618,0,0.18575,1.4826,-10,9,19,0.01558,3.90242,0.10384
"ehiDGEN_10_Pers",5,489,0.29448,2.34328,0,0.24936,1.4826,-9,10,19,0.06997,2.70639,0.10597
"ehiDGEN_10_Val",6,489,0.40491,2.32319,0,0.32061,1.4826,-8,10,18,0.37025,2.73426,0.10506
"ehi5_pref_MEAN",7,489,0.07607,0.64562,0,0.05954,0.29652,-4.4,4,8.4,0.13291,10.10075,0.0292
"ehi5_pers_MEAN",8,489,0.07454,0.74495,0,0.05267,0.59304,-2.4,3.6,6,0.58141,3.07505,0.03369
"ehi5_val_MEAN",9,489,0.05194,0.58562,0,0.02748,0.29652,-2.6,3,5.6,0.62482,5.46128,0.02648
"ehi10_pref_MEAN",10,489,0.10348,0.69093,0,0.09059,0.59304,-3.6,3.6,7.2,0.02685,4.20962,0.03124
"ehi10_pers_MEAN",11,489,0.12168,0.75889,0,0.09389,0.59304,-3.6,3.8,7.4,0.14345,3.87988,0.03432
"ehi10_val_MEAN",12,489,0.14397,0.71372,0,0.08193,0.29652,-2.4,4,6.4,1.25911,5.60017,0.03228
"ehi5.10_pref_MEAN",13,489,0.13538,0.70635,0,0.12214,0.29652,-5.2,2.6,7.8,-0.87659,8.98815,0.03194
"ehi5.10_pers_MEAN",14,489,0.18773,0.77513,0,0.16489,0.59304,-3.6,3.4,7,0.18903,2.50653,0.03505
"ehi5.10_val_MEAN",15,489,0.12229,0.73027,0,0.08193,0.29652,-2.8,3.6,6.4,0.68131,4.79817,0.03302
"ehiDGEN_5_mean",16,489,0.06203,1.45735,0,0.06361,0.9884,-7.66667,5.66667,13.33333,-0.33245,3.67417,0.0659
"ehiDGEN_10_mean",17,489,0.30061,1.89245,0,0.28329,0.9884,-8,9,17,0.01127,3.38489,0.08558
"ehi5_global_mean",18,489,0.06752,0.46201,0,0.0486,0.29652,-1.93333,3.33333,5.26667,0.93595,7.08224,0.02089
"ehi10_global_mean",19,489,0.12304,0.52522,0,0.08494,0.29652,-2.46667,2.33333,4.8,0.55734,3.94213,0.02375
"ehi5.10_global_mean",20,489,0.14847,0.50536,0.06667,0.12231,0.29652,-1.93333,2.8,4.73333,0.67593,4.05776,0.02285
"aot_total",21,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
"crt_correct",22,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
"crt_int",23,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
1 vars n mean sd median trimmed mad min max range skew kurtosis se
1 vars n mean sd median trimmed mad min max range skew kurtosis se
2 ehiDGEN_5_Pref 1 489 0.06339 1.9954 0 0.03308 1.4826 -9 9 18 0.27576 4.00366 0.09024
3 ehiDGEN_5_Pers 2 489 0.09816 1.85725 0 0.1145 1.4826 -9 7 16 -0.18711 3.23028 0.08399
4 ehiDGEN_5_Val 3 489 0.02454 2.16042 0 0.06361 1.4826 -9 10 19 0.1765 4.44483 0.0977
5 ehiDGEN_10_Pref 4 489 0.20245 2.29618 0 0.18575 1.4826 -10 9 19 0.01558 3.90242 0.10384
6 ehiDGEN_10_Pers 5 489 0.29448 2.34328 0 0.24936 1.4826 -9 10 19 0.06997 2.70639 0.10597
7 ehiDGEN_10_Val 6 489 0.40491 2.32319 0 0.32061 1.4826 -8 10 18 0.37025 2.73426 0.10506
8 ehi5_pref_MEAN 7 489 0.07607 0.64562 0 0.05954 0.29652 -4.4 4 8.4 0.13291 10.10075 0.0292
9 ehi5_pers_MEAN 8 489 0.07454 0.74495 0 0.05267 0.59304 -2.4 3.6 6 0.58141 3.07505 0.03369
10 ehi5_val_MEAN 9 489 0.05194 0.58562 0 0.02748 0.29652 -2.6 3 5.6 0.62482 5.46128 0.02648
11 ehi10_pref_MEAN 10 489 0.10348 0.69093 0 0.09059 0.59304 -3.6 3.6 7.2 0.02685 4.20962 0.03124
12 ehi10_pers_MEAN 11 489 0.12168 0.75889 0 0.09389 0.59304 -3.6 3.8 7.4 0.14345 3.87988 0.03432
13 ehi10_val_MEAN 12 489 0.14397 0.71372 0 0.08193 0.29652 -2.4 4 6.4 1.25911 5.60017 0.03228
14 ehi5.10_pref_MEAN 13 489 0.13538 0.70635 0 0.12214 0.29652 -5.2 2.6 7.8 -0.87659 8.98815 0.03194
15 ehi5.10_pers_MEAN 14 489 0.18773 0.77513 0 0.16489 0.59304 -3.6 3.4 7 0.18903 2.50653 0.03505
16 ehi5.10_val_MEAN 15 489 0.12229 0.73027 0 0.08193 0.29652 -2.8 3.6 6.4 0.68131 4.79817 0.03302
17 ehiDGEN_5_mean 16 489 0.06203 1.45735 0 0.06361 0.9884 -7.66667 5.66667 13.33333 -0.33245 3.67417 0.0659
18 ehiDGEN_10_mean 17 489 0.30061 1.89245 0 0.28329 0.9884 -8 9 17 0.01127 3.38489 0.08558
19 ehi5_global_mean 18 489 0.06752 0.46201 0 0.0486 0.29652 -1.93333 3.33333 5.26667 0.93595 7.08224 0.02089
20 ehi10_global_mean 19 489 0.12304 0.52522 0 0.08494 0.29652 -2.46667 2.33333 4.8 0.55734 3.94213 0.02375
21 ehi5.10_global_mean 20 489 0.14847 0.50536 0.06667 0.12231 0.29652 -1.93333 2.8 4.73333 0.67593 4.05776 0.02285
22 aot_total 21 489 0.20475 0.50227 0.125 0.18861 0.37065 -2 1.75 3.75 0.06455 1.92235 0.02271
23 crt_correct 22 489 0.26858 0.35082 0 0.21204 0 0 1 1 0.97116 -0.45636 0.01586
24 crt_int 23 489 0.67212 0.3564 0.66667 0.71416 0.4942 0 1 1 -0.65482 -0.92392 0.01612

View File

@ -0,0 +1,15 @@
"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
"DGEN_past_5_mean",1,489,3.70279,2.64159,3.33333,3.57422,2.9652,0,10,10,0.33519,-0.92598,0.11946
"DGEN_past_10_mean",2,489,4.12747,2.69248,4,4.07209,3.4594,0,10,10,0.15303,-1.06997,0.12176
"DGEN_fut_5_mean",3,489,3.64076,2.72097,3,3.47498,2.9652,0,10,10,0.45879,-0.91272,0.12305
"DGEN_fut_10_mean",4,489,3.82686,2.67605,3.33333,3.6743,2.9652,0,10,10,0.41341,-0.85159,0.12101
"DGEN_past_5.10_mean",5,489,1.10907,1.14357,0.66667,0.93384,0.9884,0,6.66667,6.66667,1.38388,2.20547,0.05171
"DGEN_fut_5.10_mean",6,489,1.03272,1.16269,0.66667,0.83885,0.9884,0,7.33333,7.33333,1.82686,4.61137,0.05258
"DGENpast_global_mean",7,489,2.97978,1.86338,3.11111,2.96409,2.30627,0,6.66667,6.66667,0.05122,-1.1105,0.08427
"DGENfut_global_mean",8,489,2.83345,1.86526,2.66667,2.76788,2.30627,0,6.66667,6.66667,0.25867,-1.05797,0.08435
"DGEN_5_global_mean",9,489,3.67178,2.58067,3.16667,3.53859,2.9652,0,10,10,0.38068,-0.90478,0.1167
"DGEN_10_global_mean",10,489,3.97716,2.51197,3.83333,3.89992,2.9652,0,10,10,0.23861,-0.92036,0.1136
"DGEN_5.10_global_mean",11,489,1.07089,0.95646,0.83333,0.95335,0.9884,0,5,5,1.13335,1.28416,0.04325
"aot_total",12,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
"crt_correct",13,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
"crt_int",14,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
1 vars n mean sd median trimmed mad min max range skew kurtosis se
1 vars n mean sd median trimmed mad min max range skew kurtosis se
2 DGEN_past_5_mean 1 489 3.70279 2.64159 3.33333 3.57422 2.9652 0 10 10 0.33519 -0.92598 0.11946
3 DGEN_past_10_mean 2 489 4.12747 2.69248 4 4.07209 3.4594 0 10 10 0.15303 -1.06997 0.12176
4 DGEN_fut_5_mean 3 489 3.64076 2.72097 3 3.47498 2.9652 0 10 10 0.45879 -0.91272 0.12305
5 DGEN_fut_10_mean 4 489 3.82686 2.67605 3.33333 3.6743 2.9652 0 10 10 0.41341 -0.85159 0.12101
6 DGEN_past_5.10_mean 5 489 1.10907 1.14357 0.66667 0.93384 0.9884 0 6.66667 6.66667 1.38388 2.20547 0.05171
7 DGEN_fut_5.10_mean 6 489 1.03272 1.16269 0.66667 0.83885 0.9884 0 7.33333 7.33333 1.82686 4.61137 0.05258
8 DGENpast_global_mean 7 489 2.97978 1.86338 3.11111 2.96409 2.30627 0 6.66667 6.66667 0.05122 -1.1105 0.08427
9 DGENfut_global_mean 8 489 2.83345 1.86526 2.66667 2.76788 2.30627 0 6.66667 6.66667 0.25867 -1.05797 0.08435
10 DGEN_5_global_mean 9 489 3.67178 2.58067 3.16667 3.53859 2.9652 0 10 10 0.38068 -0.90478 0.1167
11 DGEN_10_global_mean 10 489 3.97716 2.51197 3.83333 3.89992 2.9652 0 10 10 0.23861 -0.92036 0.1136
12 DGEN_5.10_global_mean 11 489 1.07089 0.95646 0.83333 0.95335 0.9884 0 5 5 1.13335 1.28416 0.04325
13 aot_total 12 489 0.20475 0.50227 0.125 0.18861 0.37065 -2 1.75 3.75 0.06455 1.92235 0.02271
14 crt_correct 13 489 0.26858 0.35082 0 0.21204 0 0 1 1 0.97116 -0.45636 0.01586
15 crt_int 14 489 0.67212 0.3564 0.66667 0.71416 0.4942 0 1 1 -0.65482 -0.92392 0.01612

Some files were not shown because too many files have changed in this diff Show More