eohi3 update
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
.history
|
||||||
55
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"type": "R-Debugger",
|
||||||
|
"name": "Launch R-Workspace",
|
||||||
|
"request": "launch",
|
||||||
|
"debugMode": "workspace",
|
||||||
|
"workingDirectory": "${workspaceFolder}",
|
||||||
|
"splitOverwrittenOutput": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "R-Debugger",
|
||||||
|
"name": "Debug R-File",
|
||||||
|
"request": "launch",
|
||||||
|
"debugMode": "file",
|
||||||
|
"workingDirectory": "${workspaceFolder}",
|
||||||
|
"file": "${file}",
|
||||||
|
"splitOverwrittenOutput": true,
|
||||||
|
"stopOnEntry": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "R-Debugger",
|
||||||
|
"name": "Debug R-Function",
|
||||||
|
"request": "launch",
|
||||||
|
"debugMode": "function",
|
||||||
|
"workingDirectory": "${workspaceFolder}",
|
||||||
|
"file": "${file}",
|
||||||
|
"mainFunction": "main",
|
||||||
|
"allowGlobalDebugging": false,
|
||||||
|
"splitOverwrittenOutput": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "R-Debugger",
|
||||||
|
"name": "Debug R-Package",
|
||||||
|
"request": "launch",
|
||||||
|
"debugMode": "workspace",
|
||||||
|
"workingDirectory": "${workspaceFolder}",
|
||||||
|
"includePackageScopes": true,
|
||||||
|
"loadPackages": [
|
||||||
|
"."
|
||||||
|
],
|
||||||
|
"splitOverwrittenOutput": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "R-Debugger",
|
||||||
|
"request": "attach",
|
||||||
|
"name": "Attach to R process",
|
||||||
|
"splitOverwrittenOutput": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
BIN
anova tables.pptx
Normal file
BIN
correlation tables for presentation.docx
Normal file
118
eohi1/BS_means.vb
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
Option Explicit
|
||||||
|
|
||||||
|
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||||
|
Dim lastCol As Long
|
||||||
|
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||||
|
Dim m As Variant
|
||||||
|
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||||
|
If IsError(m) Then
|
||||||
|
GetColIndex = 0
|
||||||
|
Else
|
||||||
|
GetColIndex = CLng(m)
|
||||||
|
End If
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
|
||||||
|
Dim tmp() As Long
|
||||||
|
ReDim tmp(0 To UBound(headers))
|
||||||
|
Dim i As Long, c As Long
|
||||||
|
c = 0
|
||||||
|
For i = LBound(headers) To UBound(headers)
|
||||||
|
Dim colIdx As Long
|
||||||
|
colIdx = GetColIndex(CStr(headers(i)), ws)
|
||||||
|
If colIdx > 0 Then
|
||||||
|
tmp(c) = colIdx
|
||||||
|
c = c + 1
|
||||||
|
End If
|
||||||
|
Next i
|
||||||
|
If c = 0 Then
|
||||||
|
BuildPresentColArray = Array()
|
||||||
|
Else
|
||||||
|
Dim outArr() As Long
|
||||||
|
ReDim outArr(0 To c - 1)
|
||||||
|
For i = 0 To c - 1
|
||||||
|
outArr(i) = tmp(i)
|
||||||
|
Next i
|
||||||
|
BuildPresentColArray = outArr
|
||||||
|
End If
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
|
||||||
|
Dim i As Long
|
||||||
|
Dim sumVals As Double
|
||||||
|
Dim countVals As Long
|
||||||
|
sumVals = 0
|
||||||
|
countVals = 0
|
||||||
|
If IsArray(colIndexes) Then
|
||||||
|
For i = LBound(colIndexes) To UBound(colIndexes)
|
||||||
|
Dim v As Variant
|
||||||
|
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
|
||||||
|
If Not IsError(v) Then
|
||||||
|
If IsNumeric(v) Then
|
||||||
|
sumVals = sumVals + CDbl(v)
|
||||||
|
countVals = countVals + 1
|
||||||
|
End If
|
||||||
|
End If
|
||||||
|
Next i
|
||||||
|
End If
|
||||||
|
If countVals = 0 Then
|
||||||
|
MeanOfRow = CVErr(xlErrNA)
|
||||||
|
Else
|
||||||
|
MeanOfRow = sumVals / countVals
|
||||||
|
End If
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
|
||||||
|
Dim c As Long
|
||||||
|
c = GetColIndex(headerName, ws)
|
||||||
|
If c = 0 Then
|
||||||
|
Dim lastCol As Long
|
||||||
|
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||||
|
c = lastCol + 1
|
||||||
|
ws.Cells(1, c).Value = headerName
|
||||||
|
End If
|
||||||
|
EnsureOutputColumn = c
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Sub BS_Means()
|
||||||
|
Dim ws As Worksheet
|
||||||
|
Set ws = ThisWorkbook.Sheets(1)
|
||||||
|
|
||||||
|
Dim all28 As Variant
|
||||||
|
all28 = Array( _
|
||||||
|
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
|
||||||
|
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||||
|
)
|
||||||
|
|
||||||
|
Dim easy14 As Variant
|
||||||
|
easy14 = Array( _
|
||||||
|
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
|
||||||
|
)
|
||||||
|
|
||||||
|
Dim hard14 As Variant
|
||||||
|
hard14 = Array( _
|
||||||
|
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||||
|
)
|
||||||
|
|
||||||
|
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
|
||||||
|
colsAll = BuildPresentColArray(all28, ws)
|
||||||
|
colsEasy = BuildPresentColArray(easy14, ws)
|
||||||
|
colsHard = BuildPresentColArray(hard14, ws)
|
||||||
|
|
||||||
|
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
|
||||||
|
colBS28 = EnsureOutputColumn(ws, "bs_28")
|
||||||
|
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
|
||||||
|
colBSHard = EnsureOutputColumn(ws, "bs_hard")
|
||||||
|
|
||||||
|
Dim lastRow As Long
|
||||||
|
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||||
|
|
||||||
|
Dim r As Long
|
||||||
|
For r = 2 To lastRow
|
||||||
|
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
|
||||||
|
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
|
||||||
|
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
|
||||||
|
Next r
|
||||||
|
End Sub
|
||||||
|
|
||||||
|
|
||||||
25
eohi1/DataP 01 - domain mean totals .r
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Load the data
|
||||||
|
exp1_data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
|
||||||
|
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
|
||||||
|
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Save the updated data
|
||||||
|
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
|
||||||
|
|
||||||
|
# Display summary of the calculated totals
|
||||||
|
cat("NPast_mean_total summary:\n")
|
||||||
|
summary(exp1_data$NPast_mean_total)
|
||||||
|
cat("\nNFut_mean_total summary:\n")
|
||||||
|
summary(exp1_data$NFut_mean_total)
|
||||||
|
|
||||||
|
# Show first few rows to verify calculations
|
||||||
|
cat("\nFirst 5 rows of calculated totals:\n")
|
||||||
|
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
|
||||||
|
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
|
||||||
17
eohi1/E1 - correlation_matrix.csv
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
|
||||||
|
ehi_global_mean,0.31***,,,,,,,,,,,
|
||||||
|
sex_dummy,0.0086,0.044,,,,,,,,,,
|
||||||
|
demo_age_1,-0.12***,-0.19***,0.0094,,,,,,,,,
|
||||||
|
edu_num,-0.0081,0.009,-0.062*,-0.023,,,,,,,,
|
||||||
|
AOT_total,0.11***,0.086***,-0.042,0.11,0.035,,,,,,,
|
||||||
|
CRT_correct,0.068*,0.051,-0.15***,-0.049,0.13***,0.26***,,,,,,
|
||||||
|
CRT_int,-0.058,-0.057,0.16***,0.071*,-0.12***,-0.21***,-0.87***,,,,,
|
||||||
|
bs_28,-0.01,-0.006803247,-0.0022,-0.071*,-0.065*,-0.27***,-0.25***,0.23***,,,,
|
||||||
|
bs_easy,-0.0088,0.014,-0.037,-0.30***,-0.009,-0.31***,-0.094***,0.047,0.61***,,,
|
||||||
|
bs_hard,-0.015,-0.024,0.022,0.095**,-0.085**,-0.15***,-0.24***,0.25***,0.87***,0.18***,,
|
||||||
|
cal_selfActual,0.015,-0.051,-0.15***,0.18***,0.12***,-0.041,0.047,-0.053,0.31***,0.05315529,0.35***,
|
||||||
|
cal_global,0.019,-0.0047,-0.078*,0.031,0.021,-0.16***,-0.15***,0.13***,0.76***,0.37***,0.72***,0.55***
|
||||||
|
,,,,,,,,,,,,
|
||||||
|
*p<0.05,,,,,,,,,,,,
|
||||||
|
**p<0.01,,,,,,,,,,,,
|
||||||
|
***p<0.001,,,,,,,,,,,,
|
||||||
|
18
eohi1/E1 - correlation_pvalues.csv
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
,eohiDGEN_mean,ehi_global_mean,sex_dummy,demo_age_1,edu_num,AOT_total,CRT_correct,CRT_int,bs_28,bs_easy,bs_hard,cal_selfActual
|
||||||
|
ehi_global_mean,3.36865E-25,,,,,,,,,,,
|
||||||
|
sex_dummy,0.778486413,0.154074007,,,,,,,,,,
|
||||||
|
demo_age_1,5.04502E-05,1.68576E-10,0.759436702,,,,,,,,,
|
||||||
|
edu_num,0.79326152,0.768960745,0.043945088,0.454222403,,,,,,,,
|
||||||
|
AOT_total,0.000229114,0.004867793,0.168503786,0.000314432,0.259313829,,,,,,,
|
||||||
|
CRT_correct,0.027291283,0.099344285,4.65922E-07,0.108342391,3.46509E-05,4.3812E-18,,,,,,
|
||||||
|
CRT_int,0.05842806,0.065472289,1.53701E-07,0.021280157,6.99182E-05,3.99893E-12,0,,,,,
|
||||||
|
bs_28,0.735913365,0.824825013,0.94333124,0.020232959,0.035132191,1.09048E-18,1.61283E-16,1.17159E-13,,,,
|
||||||
|
bs_easy,0.775943639,0.653590097,0.22951034,3.86076E-23,0.77008947,1.15661E-24,0.002210577,0.123278134,6.2215E-108,,,
|
||||||
|
bs_hard,0.622307404,0.443340859,0.469287124,0.001914319,0.005383608,7.67838E-07,9.71752E-16,2.00781E-16,0,2.79128E-09,,
|
||||||
|
cal_selfActual,0.623530057,0.098794041,8.23097E-07,2.62161E-09,0.000133768,0.183121705,0.125377421,0.088656149,2.63226E-25,0.083521419,5.04214E-32,
|
||||||
|
cal_global,0.543555525,0.879493859,0.011276459,0.318757547,0.487961367,3.0437E-07,8.00711E-07,1.20286E-05,1.4315E-202,3.1393E-35,1.2757E-171,4.92187E-86
|
||||||
|
,,,,,,,,,,,,
|
||||||
|
,,,,,,,,,,,,
|
||||||
|
,p<0.05,,,,,,,,,,,
|
||||||
|
,p<0.01,,,,,,,,,,,
|
||||||
|
,p<0.001,,,,,,,,,,,
|
||||||
|
65
eohi1/EHI reliability.html
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
<html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Two-Item Reliability Summary</h2><p>Pearson r: 0.34049 (95% CI: [0.28622, 0.39258])</p><p>Spearman r: 0.31052 (95% CI: [0.25516, 0.36386])</p><p>Spearman–Brown / Cronbach's α: 0.50801 (95% CI: [0.44506, 0.56382])</p><h2>Cronbach's Alpha</h2><table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> </th>
|
||||||
|
<th style="text-align:right;"> raw_alpha </th>
|
||||||
|
<th style="text-align:right;"> std.alpha </th>
|
||||||
|
<th style="text-align:right;"> G6(smc) </th>
|
||||||
|
<th style="text-align:right;"> average_r </th>
|
||||||
|
<th style="text-align:right;"> S/N </th>
|
||||||
|
<th style="text-align:right;"> ase </th>
|
||||||
|
<th style="text-align:right;"> mean </th>
|
||||||
|
<th style="text-align:right;"> sd </th>
|
||||||
|
<th style="text-align:right;"> median_r </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> </td>
|
||||||
|
<td style="text-align:right;"> 0.3078741 </td>
|
||||||
|
<td style="text-align:right;"> 0.5080098 </td>
|
||||||
|
<td style="text-align:right;"> 0.3404914 </td>
|
||||||
|
<td style="text-align:right;"> 0.3404914 </td>
|
||||||
|
<td style="text-align:right;"> 1.032561 </td>
|
||||||
|
<td style="text-align:right;"> 0.0216938 </td>
|
||||||
|
<td style="text-align:right;"> 0.2755268 </td>
|
||||||
|
<td style="text-align:right;"> 0.98504 </td>
|
||||||
|
<td style="text-align:right;"> 0.3404914 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.50801</p><h2>Item-Level Statistics</h2><table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> </th>
|
||||||
|
<th style="text-align:right;"> n </th>
|
||||||
|
<th style="text-align:right;"> raw.r </th>
|
||||||
|
<th style="text-align:right;"> std.r </th>
|
||||||
|
<th style="text-align:right;"> r.cor </th>
|
||||||
|
<th style="text-align:right;"> r.drop </th>
|
||||||
|
<th style="text-align:right;"> mean </th>
|
||||||
|
<th style="text-align:right;"> sd </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> eohiDGEN_mean </td>
|
||||||
|
<td style="text-align:right;"> 1063 </td>
|
||||||
|
<td style="text-align:right;"> 0.9706327 </td>
|
||||||
|
<td style="text-align:right;"> 0.8186853 </td>
|
||||||
|
<td style="text-align:right;"> 0.4777163 </td>
|
||||||
|
<td style="text-align:right;"> 0.3404914 </td>
|
||||||
|
<td style="text-align:right;"> 0.4148824 </td>
|
||||||
|
<td style="text-align:right;"> 1.740598 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehi_global_mean </td>
|
||||||
|
<td style="text-align:right;"> 1063 </td>
|
||||||
|
<td style="text-align:right;"> 0.5566839 </td>
|
||||||
|
<td style="text-align:right;"> 0.8186853 </td>
|
||||||
|
<td style="text-align:right;"> 0.4777163 </td>
|
||||||
|
<td style="text-align:right;"> 0.3404914 </td>
|
||||||
|
<td style="text-align:right;"> 0.1361712 </td>
|
||||||
|
<td style="text-align:right;"> 0.504053 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table></body></html>
|
||||||
BIN
eohi1/Rplots.pdf
Normal file
0
eohi1/accuracy + calibration - 28 items.vb
Normal file
BIN
eohi1/age_DGEN_assumptions.png
Normal file
|
After Width: | Height: | Size: 17 KiB |
BIN
eohi1/age_DGEN_plot.png
Normal file
|
After Width: | Height: | Size: 444 KiB |
BIN
eohi1/age_domain_assumptions.png
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
eohi1/age_domain_plot.png
Normal file
|
After Width: | Height: | Size: 530 KiB |
164
eohi1/assumption_checks_before_cronbach.r
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
# Assumption Checks Before Cronbach's Alpha Analysis
|
||||||
|
# Run this BEFORE the main reliability analysis
|
||||||
|
|
||||||
|
library(psych)
|
||||||
|
library(corrplot)
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Define scale variables
|
||||||
|
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
|
||||||
|
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
|
||||||
|
|
||||||
|
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
|
||||||
|
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
|
||||||
|
|
||||||
|
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
|
||||||
|
"NPastDiff_val_performance", "NPastDiff_val_justice")
|
||||||
|
|
||||||
|
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
|
||||||
|
"NPastDiff_life_important", "NPastDiff_life_change")
|
||||||
|
|
||||||
|
# Function to check assumptions for a scale
|
||||||
|
check_assumptions <- function(data, var_names, scale_name) {
|
||||||
|
cat("\n", "="*60, "\n")
|
||||||
|
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
|
||||||
|
cat("="*60, "\n")
|
||||||
|
|
||||||
|
# Get scale data
|
||||||
|
scale_data <- data[, var_names]
|
||||||
|
|
||||||
|
# 1. Sample size check
|
||||||
|
complete_cases <- sum(complete.cases(scale_data))
|
||||||
|
cat("1. SAMPLE SIZE CHECK:\n")
|
||||||
|
cat(" Total participants:", nrow(data), "\n")
|
||||||
|
cat(" Complete cases:", complete_cases, "\n")
|
||||||
|
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
|
||||||
|
|
||||||
|
if(complete_cases < 30) {
|
||||||
|
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
|
||||||
|
return(FALSE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# 2. Missing data check
|
||||||
|
cat("\n2. MISSING DATA CHECK:\n")
|
||||||
|
missing_counts <- colSums(is.na(scale_data))
|
||||||
|
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
|
||||||
|
cat(" Missing data by item:\n")
|
||||||
|
for(i in 1:length(var_names)) {
|
||||||
|
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
max_missing <- max(missing_pct)
|
||||||
|
cat(" Maximum missing:", max_missing, "%\n")
|
||||||
|
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
|
||||||
|
|
||||||
|
# 3. Use only complete cases for remaining checks
|
||||||
|
complete_data <- scale_data[complete.cases(scale_data), ]
|
||||||
|
|
||||||
|
# 4. Normality check (Shapiro-Wilk test on first item as example)
|
||||||
|
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
|
||||||
|
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
|
||||||
|
shapiro_result <- shapiro.test(complete_data[, 1])
|
||||||
|
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
|
||||||
|
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
|
||||||
|
} else {
|
||||||
|
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# 5. Inter-item correlations check
|
||||||
|
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
|
||||||
|
cor_matrix <- cor(complete_data)
|
||||||
|
|
||||||
|
# Get off-diagonal correlations
|
||||||
|
cor_matrix[lower.tri(cor_matrix)] <- NA
|
||||||
|
diag(cor_matrix) <- NA
|
||||||
|
cors <- as.vector(cor_matrix)
|
||||||
|
cors <- cors[!is.na(cors)]
|
||||||
|
|
||||||
|
positive_cors <- sum(cors > 0)
|
||||||
|
strong_cors <- sum(cors > 0.30)
|
||||||
|
negative_cors <- sum(cors < 0)
|
||||||
|
|
||||||
|
cat(" Total correlations:", length(cors), "\n")
|
||||||
|
cat(" Positive correlations:", positive_cors, "\n")
|
||||||
|
cat(" Strong correlations (>0.30):", strong_cors, "\n")
|
||||||
|
cat(" Negative correlations:", negative_cors, "\n")
|
||||||
|
cat(" Mean correlation:", round(mean(cors), 4), "\n")
|
||||||
|
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
|
||||||
|
|
||||||
|
if(negative_cors > 0) {
|
||||||
|
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
|
||||||
|
}
|
||||||
|
if(strong_cors / length(cors) < 0.5) {
|
||||||
|
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# 6. Item variance check
|
||||||
|
cat("\n5. ITEM VARIANCE CHECK:\n")
|
||||||
|
item_vars <- apply(complete_data, 2, var)
|
||||||
|
var_ratio <- max(item_vars) / min(item_vars)
|
||||||
|
cat(" Item variances:", round(item_vars, 4), "\n")
|
||||||
|
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
|
||||||
|
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
|
||||||
|
|
||||||
|
# 7. Outlier check
|
||||||
|
cat("\n6. OUTLIER CHECK:\n")
|
||||||
|
# Check for multivariate outliers using Mahalanobis distance
|
||||||
|
if(nrow(complete_data) > ncol(complete_data)) {
|
||||||
|
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
|
||||||
|
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
|
||||||
|
outliers <- sum(mahal_dist > outlier_threshold)
|
||||||
|
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
|
||||||
|
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# 8. Summary recommendation
|
||||||
|
cat("\n7. OVERALL RECOMMENDATION:\n")
|
||||||
|
issues <- 0
|
||||||
|
if(complete_cases < 30) issues <- issues + 1
|
||||||
|
if(max_missing >= 20) issues <- issues + 1
|
||||||
|
if(negative_cors > 0) issues <- issues + 1
|
||||||
|
if(var_ratio >= 4) issues <- issues + 1
|
||||||
|
|
||||||
|
if(issues == 0) {
|
||||||
|
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
|
||||||
|
} else if(issues <= 2) {
|
||||||
|
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
|
||||||
|
} else {
|
||||||
|
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
return(TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check assumptions for all past scales
|
||||||
|
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
|
||||||
|
cat("\nData: exp1.csv")
|
||||||
|
cat("\nTotal sample size:", nrow(data))
|
||||||
|
|
||||||
|
check_assumptions(data, past_pref_vars, "Past Preferences")
|
||||||
|
check_assumptions(data, past_pers_vars, "Past Personality")
|
||||||
|
check_assumptions(data, past_val_vars, "Past Values")
|
||||||
|
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
|
||||||
|
|
||||||
|
# Quick check of future scales (you can expand this)
|
||||||
|
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
|
||||||
|
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
||||||
|
|
||||||
|
check_assumptions(data, fut_pref_vars, "Future Preferences")
|
||||||
|
|
||||||
|
cat("\n", "="*60, "\n")
|
||||||
|
cat("GENERAL GUIDELINES:\n")
|
||||||
|
cat("="*60, "\n")
|
||||||
|
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
|
||||||
|
cat("⚠️ If some assumptions are violated, interpret with caution\n")
|
||||||
|
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
|
||||||
|
cat(" - Omega coefficient (more robust to violations)\n")
|
||||||
|
cat(" - Split-half reliability\n")
|
||||||
|
cat(" - Test-retest reliability\n")
|
||||||
|
cat(" - Factor analysis to check dimensionality\n")
|
||||||
|
|
||||||
|
|
||||||
141
eohi1/brierVARS.vb
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||||
|
Dim lastCol As Long
|
||||||
|
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||||
|
On Error Resume Next
|
||||||
|
Dim m As Variant
|
||||||
|
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||||
|
On Error GoTo 0
|
||||||
|
If IsError(m) Then
|
||||||
|
GetCol = 0
|
||||||
|
Else
|
||||||
|
GetCol = CLng(m)
|
||||||
|
End If
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||||
|
' Returns True/False or Null if cannot determine
|
||||||
|
If VarType(v) = vbBoolean Then
|
||||||
|
NormalizeTruth = v
|
||||||
|
Exit Function
|
||||||
|
End If
|
||||||
|
If IsError(v) Or IsEmpty(v) Then
|
||||||
|
NormalizeTruth = Null
|
||||||
|
Exit Function
|
||||||
|
End If
|
||||||
|
Dim s As String
|
||||||
|
s = Trim$(UCase$(CStr(v)))
|
||||||
|
Select Case s
|
||||||
|
Case "TRUE", "T", "1", "YES", "Y"
|
||||||
|
NormalizeTruth = True
|
||||||
|
Case "FALSE", "F", "0", "NO", "N"
|
||||||
|
NormalizeTruth = False
|
||||||
|
Case Else
|
||||||
|
NormalizeTruth = Null
|
||||||
|
End Select
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||||
|
' Converts confidence values to [0,1]
|
||||||
|
If IsError(v) Or IsEmpty(v) Then
|
||||||
|
NormalizeProb01 = -1
|
||||||
|
Exit Function
|
||||||
|
End If
|
||||||
|
Dim s As String
|
||||||
|
s = CStr(v)
|
||||||
|
If InStr(s, "%") > 0 Then
|
||||||
|
s = Replace$(s, "%", "")
|
||||||
|
If IsNumeric(s) Then
|
||||||
|
NormalizeProb01 = CDbl(s) / 100#
|
||||||
|
Exit Function
|
||||||
|
End If
|
||||||
|
End If
|
||||||
|
If IsNumeric(v) Then
|
||||||
|
Dim d As Double
|
||||||
|
d = CDbl(v)
|
||||||
|
If d > 1# Then
|
||||||
|
NormalizeProb01 = d / 100#
|
||||||
|
Else
|
||||||
|
NormalizeProb01 = d
|
||||||
|
End If
|
||||||
|
Else
|
||||||
|
NormalizeProb01 = -1
|
||||||
|
End If
|
||||||
|
End Function
|
||||||
|
|
||||||
|
Sub brierVARS()
|
||||||
|
Dim false_vars As Variant
|
||||||
|
Dim true_vars As Variant
|
||||||
|
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
|
||||||
|
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||||
|
Dim target_headers As Variant
|
||||||
|
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||||
|
Dim ws As Worksheet
|
||||||
|
Set ws = ThisWorkbook.Sheets(1)
|
||||||
|
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||||
|
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||||
|
Dim result As Variant
|
||||||
|
Dim r As Long, j As Long
|
||||||
|
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||||
|
For i = 0 To UBound(false_vars) Step 2
|
||||||
|
srcVar = false_vars(i)
|
||||||
|
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||||
|
colSource1 = GetCol(srcVar, ws)
|
||||||
|
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||||
|
If colSource1 > 0 And colSourceCON > 0 Then
|
||||||
|
For r = 2 To rowCount
|
||||||
|
checkVal = ws.Cells(r, colSource1).Value
|
||||||
|
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||||
|
colTarget = 0
|
||||||
|
For j = 0 To UBound(target_headers)
|
||||||
|
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||||
|
colTarget = GetCol(target_headers(j), ws)
|
||||||
|
Exit For
|
||||||
|
End If
|
||||||
|
Next j
|
||||||
|
If colTarget > 0 Then
|
||||||
|
Dim truth As Variant
|
||||||
|
truth = NormalizeTruth(checkVal)
|
||||||
|
If IsNull(truth) Or val < 0 Then
|
||||||
|
result = CVErr(xlErrNA)
|
||||||
|
ElseIf truth = True Then
|
||||||
|
result = (val - 0#) ^ 2
|
||||||
|
Else
|
||||||
|
result = ((1# - val) - 0#) ^ 2
|
||||||
|
End If
|
||||||
|
ws.Cells(r, colTarget).Value = result
|
||||||
|
End If
|
||||||
|
Next r
|
||||||
|
End If
|
||||||
|
Next i
|
||||||
|
For i = 0 To UBound(true_vars) Step 2
|
||||||
|
srcVar = true_vars(i)
|
||||||
|
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||||
|
colSource1 = GetCol(srcVar, ws)
|
||||||
|
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||||
|
If colSource1 > 0 And colSourceCON > 0 Then
|
||||||
|
For r = 2 To rowCount
|
||||||
|
checkVal = ws.Cells(r, colSource1).Value
|
||||||
|
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||||
|
colTarget = 0
|
||||||
|
For j = 0 To UBound(target_headers)
|
||||||
|
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||||
|
colTarget = GetCol(target_headers(j), ws)
|
||||||
|
Exit For
|
||||||
|
End If
|
||||||
|
Next j
|
||||||
|
If colTarget > 0 Then
|
||||||
|
Dim truth2 As Variant
|
||||||
|
truth2 = NormalizeTruth(checkVal)
|
||||||
|
If IsNull(truth2) Or val < 0 Then
|
||||||
|
result = CVErr(xlErrNA)
|
||||||
|
ElseIf truth2 = True Then
|
||||||
|
result = (val - 1#) ^ 2
|
||||||
|
Else
|
||||||
|
result = ((1# - val) - 1#) ^ 2
|
||||||
|
End If
|
||||||
|
ws.Cells(r, colTarget).Value = result
|
||||||
|
End If
|
||||||
|
Next r
|
||||||
|
End If
|
||||||
|
Next i
|
||||||
|
End Sub
|
||||||
57
eohi1/corr_bs_all.csv
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
group,var_x,var_y,n,r,p
|
||||||
|
BS_vs_Cal,bs_28,cal_global,1063,0.761895605,2.37E-202
|
||||||
|
BS_vs_Cal,bs_easy,cal_global,1063,0.367329651,2.67E-35
|
||||||
|
BS_vs_Cal,bs_hard,cal_global,1063,0.721638057,1.16E-171
|
||||||
|
BS_vs_Cal,bs_28,cal_selfActual,1063,0.312906942,1.41E-25
|
||||||
|
BS_vs_Cal,bs_easy,cal_selfActual,1063,0.054307733,0.07675131
|
||||||
|
BS_vs_Cal,bs_hard,cal_selfActual,1063,0.351630149,2.70E-32
|
||||||
|
BS_vs_EOHI,bs_28,ehi_global_mean,1063,-0.005868845,0.848429656
|
||||||
|
BS_vs_EOHI,bs_easy,ehi_global_mean,1063,0.014251763,0.642549738
|
||||||
|
BS_vs_EOHI,bs_hard,ehi_global_mean,1063,-0.022683413,0.460035927
|
||||||
|
Cal_vs_EOHI,cal_global,ehi_global_mean,1063,-0.004912877,0.872888653
|
||||||
|
Cal_vs_EOHI,cal_selfActual,ehi_global_mean,1063,-0.049727552,0.105147189
|
||||||
|
BS_vs_EOHI,bs_28,ehi_life_mean,1063,-0.007684801,0.802384957
|
||||||
|
BS_vs_EOHI,bs_easy,ehi_life_mean,1063,-0.024372981,0.427293321
|
||||||
|
BS_vs_EOHI,bs_hard,ehi_life_mean,1063,-0.001049006,0.972748582
|
||||||
|
Cal_vs_EOHI,cal_global,ehi_life_mean,1063,-0.002906394,0.924594545
|
||||||
|
Cal_vs_EOHI,cal_selfActual,ehi_life_mean,1063,0.028247567,0.357533744
|
||||||
|
BS_vs_EOHI,bs_28,ehi_pers_mean,1063,-0.00684928,0.823495332
|
||||||
|
BS_vs_EOHI,bs_easy,ehi_pers_mean,1063,-0.009992311,0.744870091
|
||||||
|
BS_vs_EOHI,bs_hard,ehi_pers_mean,1063,-0.013361509,0.663459912
|
||||||
|
Cal_vs_EOHI,cal_global,ehi_pers_mean,1063,-0.009160631,0.765455012
|
||||||
|
Cal_vs_EOHI,cal_selfActual,ehi_pers_mean,1063,-0.082807345,0.006907147
|
||||||
|
BS_vs_EOHI,bs_28,ehi_pref_mean,1063,-0.009153426,0.765634093
|
||||||
|
BS_vs_EOHI,bs_easy,ehi_pref_mean,1063,0.038767348,0.20660862
|
||||||
|
BS_vs_EOHI,bs_hard,ehi_pref_mean,1063,-0.033018506,0.282127883
|
||||||
|
Cal_vs_EOHI,cal_global,ehi_pref_mean,1063,0.009717642,0.751649131
|
||||||
|
Cal_vs_EOHI,cal_selfActual,ehi_pref_mean,1063,-0.044228709,0.149576975
|
||||||
|
BS_vs_EOHI,bs_28,ehi_val_mean,1063,0.011477853,0.708558862
|
||||||
|
BS_vs_EOHI,bs_easy,ehi_val_mean,1063,0.048673437,0.11273791
|
||||||
|
BS_vs_EOHI,bs_hard,ehi_val_mean,1063,-0.008316877,0.786509127
|
||||||
|
Cal_vs_EOHI,cal_global,ehi_val_mean,1063,-0.024447239,0.425886059
|
||||||
|
Cal_vs_EOHI,cal_selfActual,ehi_val_mean,1063,-0.099629152,0.001143742
|
||||||
|
BS_vs_EOHI,bs_28,eohiDGEN_life,1063,-0.011893558,0.69851102
|
||||||
|
BS_vs_EOHI,bs_easy,eohiDGEN_life,1063,-0.087676604,0.004226705
|
||||||
|
BS_vs_EOHI,bs_hard,eohiDGEN_life,1063,0.036722132,0.231590633
|
||||||
|
Cal_vs_EOHI,cal_global,eohiDGEN_life,1063,0.004140068,0.892751642
|
||||||
|
Cal_vs_EOHI,cal_selfActual,eohiDGEN_life,1063,0.036877484,0.229620994
|
||||||
|
BS_vs_EOHI,bs_28,eohiDGEN_mean,1063,-0.008492209,0.782120675
|
||||||
|
BS_vs_EOHI,bs_easy,eohiDGEN_mean,1063,-0.007458252,0.808095459
|
||||||
|
BS_vs_EOHI,bs_hard,eohiDGEN_mean,1063,-0.013570736,0.658521255
|
||||||
|
Cal_vs_EOHI,cal_global,eohiDGEN_mean,1063,0.019739833,0.520291178
|
||||||
|
Cal_vs_EOHI,cal_selfActual,eohiDGEN_mean,1063,0.016902147,0.582001734
|
||||||
|
BS_vs_EOHI,bs_28,eohiDGEN_pers,1063,-0.013831247,0.652392826
|
||||||
|
BS_vs_EOHI,bs_easy,eohiDGEN_pers,1063,-0.013771822,0.653788727
|
||||||
|
BS_vs_EOHI,bs_hard,eohiDGEN_pers,1063,-0.013837654,0.652242395
|
||||||
|
Cal_vs_EOHI,cal_global,eohiDGEN_pers,1063,-0.010393397,0.735006441
|
||||||
|
Cal_vs_EOHI,cal_selfActual,eohiDGEN_pers,1063,-0.028354225,0.355720599
|
||||||
|
BS_vs_EOHI,bs_28,eohiDGEN_pref,1063,0.002305478,0.940152072
|
||||||
|
BS_vs_EOHI,bs_easy,eohiDGEN_pref,1063,0.014268945,0.642148877
|
||||||
|
BS_vs_EOHI,bs_hard,eohiDGEN_pref,1063,-0.006108935,0.842308668
|
||||||
|
Cal_vs_EOHI,cal_global,eohiDGEN_pref,1063,0.024379777,0.427164421
|
||||||
|
Cal_vs_EOHI,cal_selfActual,eohiDGEN_pref,1063,0.008387657,0.784736723
|
||||||
|
BS_vs_EOHI,bs_28,eohiDGEN_val,1063,-0.043411125,0.157255062
|
||||||
|
BS_vs_EOHI,bs_easy,eohiDGEN_val,1063,-0.058087693,0.058325548
|
||||||
|
BS_vs_EOHI,bs_hard,eohiDGEN_val,1063,-0.032241181,0.293618158
|
||||||
|
Cal_vs_EOHI,cal_global,eohiDGEN_val,1063,-0.032038903,0.296658895
|
||||||
|
Cal_vs_EOHI,cal_selfActual,eohiDGEN_val,1063,0.035471252,0.247887232
|
||||||
|
103
eohi1/correlation matrix.r
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
df <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
data <- df %>%
|
||||||
|
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||||
|
filter(demo_sex != "Prefer not to say")
|
||||||
|
|
||||||
|
print(colSums(is.na(data)))
|
||||||
|
print(sapply(data, class))
|
||||||
|
|
||||||
|
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||||
|
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||||
|
|
||||||
|
# Verify the dummy coding
|
||||||
|
print(table(data$demo_sex, data$sex_dummy))
|
||||||
|
|
||||||
|
#descriptives
|
||||||
|
|
||||||
|
# Descriptives for age
|
||||||
|
print(summary(data$demo_age_1))
|
||||||
|
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||||
|
|
||||||
|
# Center demo_age_1 (subtract the mean)
|
||||||
|
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Verify the centering
|
||||||
|
print(summary(data$age_centered))
|
||||||
|
|
||||||
|
# Descriptives for sex (frequency table)
|
||||||
|
print(table(data$demo_sex))
|
||||||
|
print(prop.table(table(data$demo_sex)))
|
||||||
|
|
||||||
|
# Descriptives for sex dummy variable
|
||||||
|
print(table(data$sex_dummy))
|
||||||
|
|
||||||
|
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||||
|
# First ensure edu3 is a factor, then convert to numeric
|
||||||
|
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||||
|
data$edu_num <- as.numeric(data$edu3)
|
||||||
|
|
||||||
|
# Check the numeric conversion
|
||||||
|
print(table(data$edu_num, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Verify the conversion
|
||||||
|
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||||
|
|
||||||
|
####correlation matrix ####
|
||||||
|
|
||||||
|
# Select numeric variables for correlation matrix
|
||||||
|
numeric_vars <- data %>%
|
||||||
|
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||||
|
|
||||||
|
# Create Spearman correlation matrix
|
||||||
|
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||||
|
|
||||||
|
# Print correlation matrix
|
||||||
|
print(round(cor_matrix, 3))
|
||||||
|
|
||||||
|
# Get significance tests for correlations using psych package
|
||||||
|
library(psych)
|
||||||
|
|
||||||
|
# Create correlation matrix with significance tests
|
||||||
|
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||||
|
|
||||||
|
# Print correlation matrix
|
||||||
|
print(round(cor_test$r, 3))
|
||||||
|
|
||||||
|
# Print p-values
|
||||||
|
print(round(cor_test$p, 3))
|
||||||
|
|
||||||
|
# Print all correlations with r and p values (for reporting)
|
||||||
|
for(i in 1:nrow(cor_test$r)) {
|
||||||
|
for(j in 1:ncol(cor_test$r)) {
|
||||||
|
if(i != j) { # Skip diagonal
|
||||||
|
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||||
|
": r =", round(cor_test$r[i, j], 3),
|
||||||
|
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Also print significant correlations summary
|
||||||
|
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||||
|
if(nrow(sig_cors) > 0) {
|
||||||
|
for(i in 1:nrow(sig_cors)) {
|
||||||
|
row_idx <- sig_cors[i, 1]
|
||||||
|
col_idx <- sig_cors[i, 2]
|
||||||
|
if(row_idx != col_idx) { # Skip diagonal
|
||||||
|
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||||
|
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||||
|
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save correlation matrix and p-values to CSV files
|
||||||
|
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||||
|
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||||
31
eohi1/correlation_exp1.csv
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
Variable1,Variable2,Spearman_r,P_value
|
||||||
|
ehi_global_mean,AOT_total,0.084899905,0.005609163
|
||||||
|
ehi_life_mean,AOT_total,0.072949943,0.017368706
|
||||||
|
ehi_pref_mean,CRT_correct,0.068917794,0.024639893
|
||||||
|
ehi_pref_mean,CRT_int,-0.064728497,0.034848248
|
||||||
|
eohiDGEN_life,AOT_total,0.117946108,0.000116007
|
||||||
|
eohiDGEN_mean,AOT_total,0.111022973,0.000287049
|
||||||
|
eohiDGEN_mean,CRT_correct,0.066645932,0.029798224
|
||||||
|
eohiDGEN_pers,AOT_total,0.091596281,0.002797567
|
||||||
|
eohiDGEN_pref,AOT_total,0.068355715,0.025838342
|
||||||
|
eohiDGEN_val,AOT_total,0.103955004,0.000687468
|
||||||
|
eohiDGEN_val,CRT_correct,0.074789341,0.014729792
|
||||||
|
eohiDGEN_val,CRT_int,-0.06044435,0.048816455
|
||||||
|
eohiDGEN_pref,CRT_int,-0.058185337,0.057903079
|
||||||
|
ehi_global_mean,CRT_int,-0.057457963,0.061112
|
||||||
|
eohiDGEN_mean,CRT_int,-0.057307383,0.061794359
|
||||||
|
eohiDGEN_pref,CRT_correct,0.053610168,0.080621938
|
||||||
|
ehi_global_mean,CRT_correct,0.051329314,0.094394596
|
||||||
|
ehi_pers_mean,CRT_correct,0.051328061,0.094402649
|
||||||
|
ehi_pref_mean,AOT_total,0.05039887,0.100528033
|
||||||
|
ehi_pers_mean,CRT_int,-0.049058279,0.109918146
|
||||||
|
eohiDGEN_life,CRT_correct,0.039494214,0.198218491
|
||||||
|
ehi_val_mean,CRT_int,-0.037910532,0.216825816
|
||||||
|
ehi_pers_mean,AOT_total,0.031316388,0.307691267
|
||||||
|
eohiDGEN_pers,CRT_correct,0.025658263,0.403319679
|
||||||
|
eohiDGEN_pers,CRT_int,-0.017319242,0.572720797
|
||||||
|
ehi_life_mean,CRT_correct,0.016392301,0.593440889
|
||||||
|
ehi_val_mean,AOT_total,0.015193625,0.620731537
|
||||||
|
ehi_val_mean,CRT_correct,0.00599121,0.845308845
|
||||||
|
ehi_life_mean,CRT_int,-0.005811627,0.849889753
|
||||||
|
eohiDGEN_life,CRT_int,-0.001303724,0.966135022
|
||||||
|
BIN
eohi1/correlation_plot_scales_pearson.pdf
Normal file
BIN
eohi1/correlation_plot_scales_spearman.pdf
Normal file
81
eohi1/correlations - brier score x eohi and cal.r
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
# Load required libraries
|
||||||
|
library(Hmisc)
|
||||||
|
library(knitr)
|
||||||
|
library(dplyr)
|
||||||
|
library(corrr)
|
||||||
|
library(broom)
|
||||||
|
library(purrr)
|
||||||
|
library(tidyr)
|
||||||
|
library(tibble)
|
||||||
|
library(boot)
|
||||||
|
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
# Load data
|
||||||
|
df1 <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
# Keep only required columns for the analysis
|
||||||
|
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||||
|
eohi_vars <- c(
|
||||||
|
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||||
|
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||||
|
)
|
||||||
|
cal_vars <- c("cal_selfActual","cal_global")
|
||||||
|
|
||||||
|
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||||
|
|
||||||
|
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||||
|
|
||||||
|
# Variables
|
||||||
|
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||||
|
eohi_vars <- c(
|
||||||
|
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||||
|
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||||
|
)
|
||||||
|
cal_vars <- c("cal_selfActual","cal_global")
|
||||||
|
|
||||||
|
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||||
|
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||||
|
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||||
|
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||||
|
xv <- df[[x]]; yv <- df[[y]]
|
||||||
|
ok <- is.finite(xv) & is.finite(yv)
|
||||||
|
if (sum(ok) < 3) {
|
||||||
|
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||||
|
}
|
||||||
|
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||||
|
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||||
|
})
|
||||||
|
dplyr::arrange(results, var_x, var_y)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute correlations (Spearman only)
|
||||||
|
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||||
|
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||||
|
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
|
||||||
|
|
||||||
|
# Wide r-only tables (optional)
|
||||||
|
to_wide <- function(d) {
|
||||||
|
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||||
|
}
|
||||||
|
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||||
|
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||||
|
wide_cal_eohi <- to_wide(corr_cal_eohi)
|
||||||
|
|
||||||
|
# Display
|
||||||
|
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||||
|
print(corr_bs_eohi)
|
||||||
|
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||||
|
print(corr_bs_cal)
|
||||||
|
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
|
||||||
|
print(corr_cal_eohi)
|
||||||
|
|
||||||
|
# Export a single CSV combining all sets
|
||||||
|
corr_bs_eohi$group <- "BS_vs_EOHI"
|
||||||
|
corr_bs_cal$group <- "BS_vs_Cal"
|
||||||
|
corr_cal_eohi$group <- "Cal_vs_EOHI"
|
||||||
|
|
||||||
|
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
|
||||||
|
dplyr::relocate(group, .before = var_x)
|
||||||
|
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||||
303
eohi1/correlations - eohi x calibration.r
Normal file
@ -0,0 +1,303 @@
|
|||||||
|
# Load required libraries
|
||||||
|
library(Hmisc)
|
||||||
|
library(knitr)
|
||||||
|
library(dplyr)
|
||||||
|
library(corrr)
|
||||||
|
library(broom)
|
||||||
|
library(purrr)
|
||||||
|
library(tidyr)
|
||||||
|
library(tibble)
|
||||||
|
library(boot)
|
||||||
|
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
# Load data
|
||||||
|
df1 <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Remove columns with all NA values
|
||||||
|
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||||
|
|
||||||
|
# Select variables of interest
|
||||||
|
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||||
|
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||||
|
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
|
||||||
|
|
||||||
|
# Create dataset with selected variables
|
||||||
|
df <- df1[, c(eohi_vars, cal_vars)]
|
||||||
|
|
||||||
|
# Ensure all selected variables are numeric
|
||||||
|
df <- df %>%
|
||||||
|
mutate(across(everything(), as.numeric))
|
||||||
|
|
||||||
|
# Remove rows with any missing values for correlation analysis
|
||||||
|
df_complete <- df[complete.cases(df), ]
|
||||||
|
|
||||||
|
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||||
|
cat("Total sample size:", nrow(df), "\n")
|
||||||
|
|
||||||
|
str(df)
|
||||||
|
summary(df)
|
||||||
|
####==== DESCRIPTIVE STATISTICS ====
|
||||||
|
|
||||||
|
# Function to compute descriptive statistics
|
||||||
|
get_descriptives <- function(data, vars) {
|
||||||
|
desc_stats <- data %>%
|
||||||
|
select(all_of(vars)) %>%
|
||||||
|
summarise(across(everything(), list(
|
||||||
|
n = ~sum(!is.na(.)),
|
||||||
|
mean = ~mean(., na.rm = TRUE),
|
||||||
|
sd = ~sd(., na.rm = TRUE),
|
||||||
|
min = ~min(., na.rm = TRUE),
|
||||||
|
max = ~max(., na.rm = TRUE),
|
||||||
|
median = ~median(., na.rm = TRUE),
|
||||||
|
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||||
|
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||||
|
))) %>%
|
||||||
|
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||||
|
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||||
|
pivot_wider(names_from = stat, values_from = value) %>%
|
||||||
|
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||||
|
|
||||||
|
return(desc_stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Get descriptives for EOHI variables
|
||||||
|
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||||
|
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||||
|
print(eohi_descriptives)
|
||||||
|
|
||||||
|
# Get descriptives for calibration variables
|
||||||
|
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||||
|
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||||
|
print(cal_descriptives)
|
||||||
|
|
||||||
|
# Check for bimodal or unusual distributions
|
||||||
|
hist(df$eohi_pref)
|
||||||
|
hist(df$cal_selfActual)
|
||||||
|
|
||||||
|
# Look for extreme values
|
||||||
|
# boxplot(df$eohi_pref)
|
||||||
|
# boxplot(df$cal_selfActual)
|
||||||
|
|
||||||
|
# Test normality for each variable - probably unnecessary
|
||||||
|
library(nortest)
|
||||||
|
|
||||||
|
# Test EOHI variables
|
||||||
|
for(var in eohi_vars) {
|
||||||
|
cat("\n", var, "normality test:\n")
|
||||||
|
print(shapiro.test(df_complete[[var]]))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test calibration variables
|
||||||
|
for(var in cal_vars) {
|
||||||
|
cat("\n", var, "normality test:\n")
|
||||||
|
print(shapiro.test(df_complete[[var]]))
|
||||||
|
}
|
||||||
|
|
||||||
|
####==== PEARSON CORRELATIONS ====
|
||||||
|
|
||||||
|
# Compute correlation matrix with p-values
|
||||||
|
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||||
|
|
||||||
|
# Extract correlation coefficients
|
||||||
|
cor_pearson <- cor_results_pearson$r
|
||||||
|
|
||||||
|
# Extract p-values
|
||||||
|
p_matrix_pearson <- cor_results_pearson$P
|
||||||
|
|
||||||
|
# Function to add significance stars
|
||||||
|
corstars <- function(cor_mat, p_mat) {
|
||||||
|
stars <- ifelse(p_mat < 0.001, "***",
|
||||||
|
ifelse(p_mat < 0.01, "**",
|
||||||
|
ifelse(p_mat < 0.05, "*", "")))
|
||||||
|
|
||||||
|
# Combine correlation values with stars, rounded to 5 decimal places
|
||||||
|
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||||
|
nrow = nrow(cor_mat))
|
||||||
|
|
||||||
|
# Set row and column names
|
||||||
|
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||||
|
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||||
|
|
||||||
|
return(cor_with_stars)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Apply the function
|
||||||
|
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||||
|
|
||||||
|
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||||
|
print(cor_table_pearson, quote = FALSE)
|
||||||
|
|
||||||
|
# Extract specific correlations between EOHI and calibration variables
|
||||||
|
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||||
|
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||||
|
|
||||||
|
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||||
|
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||||
|
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||||
|
cor_val <- eohi_cal_correlations[i, j]
|
||||||
|
p_val <- eohi_cal_pvalues[i, j]
|
||||||
|
star <- ifelse(p_val < 0.001, "***",
|
||||||
|
ifelse(p_val < 0.01, "**",
|
||||||
|
ifelse(p_val < 0.05, "*", "")))
|
||||||
|
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||||
|
rownames(eohi_cal_correlations)[i],
|
||||||
|
colnames(eohi_cal_correlations)[j],
|
||||||
|
cor_val, star, p_val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
####==== SPEARMAN CORRELATIONS ====
|
||||||
|
|
||||||
|
# Compute Spearman correlation matrix with p-values
|
||||||
|
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||||
|
|
||||||
|
# Extract correlation coefficients
|
||||||
|
cor_spearman <- cor_results_spearman$r
|
||||||
|
|
||||||
|
# Extract p-values
|
||||||
|
p_matrix_spearman <- cor_results_spearman$P
|
||||||
|
|
||||||
|
# Apply the function
|
||||||
|
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||||
|
|
||||||
|
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||||
|
print(cor_table_spearman, quote = FALSE)
|
||||||
|
|
||||||
|
# Extract specific correlations between EOHI and calibration variables
|
||||||
|
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||||
|
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||||
|
|
||||||
|
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||||
|
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||||
|
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||||
|
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||||
|
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||||
|
star <- ifelse(p_val < 0.001, "***",
|
||||||
|
ifelse(p_val < 0.01, "**",
|
||||||
|
ifelse(p_val < 0.05, "*", "")))
|
||||||
|
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||||
|
rownames(eohi_cal_correlations_spearman)[i],
|
||||||
|
colnames(eohi_cal_correlations_spearman)[j],
|
||||||
|
cor_val, star, p_val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||||
|
|
||||||
|
# Function to compute correlation with bootstrap CI
|
||||||
|
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||||
|
# Remove missing values
|
||||||
|
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||||
|
|
||||||
|
if(nrow(complete_data) < 3) {
|
||||||
|
return(data.frame(
|
||||||
|
correlation = NA,
|
||||||
|
ci_lower = NA,
|
||||||
|
ci_upper = NA,
|
||||||
|
n = nrow(complete_data)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Bootstrap function
|
||||||
|
boot_fun <- function(data, indices) {
|
||||||
|
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Perform bootstrap
|
||||||
|
set.seed(123) # for reproducibility
|
||||||
|
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||||
|
|
||||||
|
# Calculate confidence interval
|
||||||
|
ci <- boot.ci(boot_results, type = "perc")
|
||||||
|
|
||||||
|
return(data.frame(
|
||||||
|
correlation = boot_results$t0,
|
||||||
|
ci_lower = ci$perc[4],
|
||||||
|
ci_upper = ci$perc[5],
|
||||||
|
n = nrow(complete_data)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||||
|
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||||
|
bootstrap_results_pearson <- expand.grid(
|
||||||
|
eohi_var = eohi_vars,
|
||||||
|
cal_var = cal_vars,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
) %>%
|
||||||
|
pmap_dfr(function(eohi_var, cal_var) {
|
||||||
|
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||||
|
result$eohi_var <- eohi_var
|
||||||
|
result$cal_var <- cal_var
|
||||||
|
result$method <- "pearson"
|
||||||
|
return(result)
|
||||||
|
}) %>%
|
||||||
|
mutate(
|
||||||
|
correlation = round(correlation, 5),
|
||||||
|
ci_lower = round(ci_lower, 5),
|
||||||
|
ci_upper = round(ci_upper, 5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(bootstrap_results_pearson)
|
||||||
|
|
||||||
|
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||||
|
bootstrap_results_spearman <- expand.grid(
|
||||||
|
eohi_var = eohi_vars,
|
||||||
|
cal_var = cal_vars,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
) %>%
|
||||||
|
pmap_dfr(function(eohi_var, cal_var) {
|
||||||
|
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||||
|
result$eohi_var <- eohi_var
|
||||||
|
result$cal_var <- cal_var
|
||||||
|
result$method <- "spearman"
|
||||||
|
return(result)
|
||||||
|
}) %>%
|
||||||
|
mutate(
|
||||||
|
correlation = round(correlation, 5),
|
||||||
|
ci_lower = round(ci_lower, 5),
|
||||||
|
ci_upper = round(ci_upper, 5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(bootstrap_results_spearman)
|
||||||
|
|
||||||
|
####==== SUMMARY TABLE ====
|
||||||
|
|
||||||
|
# Create comprehensive summary table
|
||||||
|
summary_table <- bootstrap_results_pearson %>%
|
||||||
|
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||||
|
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||||
|
left_join(
|
||||||
|
bootstrap_results_spearman %>%
|
||||||
|
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||||
|
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||||
|
by = c("eohi_var", "cal_var")
|
||||||
|
) %>%
|
||||||
|
# Add p-values
|
||||||
|
left_join(
|
||||||
|
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||||
|
pmap_dfr(function(eohi_var, cal_var) {
|
||||||
|
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||||
|
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||||
|
data.frame(
|
||||||
|
eohi_var = eohi_var,
|
||||||
|
cal_var = cal_var,
|
||||||
|
pearson_p = pearson_p,
|
||||||
|
spearman_p = spearman_p
|
||||||
|
)
|
||||||
|
}),
|
||||||
|
by = c("eohi_var", "cal_var")
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
pearson_p = round(pearson_p, 5),
|
||||||
|
spearman_p = round(spearman_p, 5)
|
||||||
|
)
|
||||||
|
|
||||||
|
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||||
|
print(summary_table)
|
||||||
|
|
||||||
|
# Save results to CSV
|
||||||
|
# write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||||
|
# cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||||
171
eohi1/correlations - scales.r
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(corrplot)
|
||||||
|
library(Hmisc)
|
||||||
|
library(psych)
|
||||||
|
|
||||||
|
# Load the data
|
||||||
|
exp1_data <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
# Define the two sets of variables
|
||||||
|
set1_vars <- c("eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||||
|
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean")
|
||||||
|
set2_vars <- c("AOT_total", "CRT_correct", "CRT_int")
|
||||||
|
|
||||||
|
# Create subset with only the variables of interest
|
||||||
|
correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
|
||||||
|
|
||||||
|
# ===== NORMALITY CHECKS =====
|
||||||
|
# Shapiro-Wilk tests for normality
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
shapiro_result <- shapiro.test(correlation_data[[var]])
|
||||||
|
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
|
||||||
|
var, shapiro_result$p.value,
|
||||||
|
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Visual normality checks
|
||||||
|
pdf("normality_plots.pdf", width = 12, height = 8)
|
||||||
|
par(mfrow = c(2, 4))
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
# Histogram with normal curve overlay
|
||||||
|
hist(correlation_data[[var]], main = paste("Histogram:", var),
|
||||||
|
xlab = var, freq = FALSE)
|
||||||
|
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
|
||||||
|
sd = sd(correlation_data[[var]], na.rm = TRUE)),
|
||||||
|
add = TRUE, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Q-Q plot
|
||||||
|
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
|
||||||
|
qqline(correlation_data[[var]], col = "red", lwd = 2)
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# ===== LINEARITY CHECKS =====
|
||||||
|
# Check linearity between variable pairs
|
||||||
|
pdf("linearity_plots.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(3, 5))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
# Scatter plot with regression line
|
||||||
|
plot(correlation_data[[var1]], correlation_data[[var2]],
|
||||||
|
main = paste(var1, "vs", var2),
|
||||||
|
xlab = var1, ylab = var2, pch = 16, cex = 0.6)
|
||||||
|
|
||||||
|
# Add linear regression line
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
abline(lm_fit, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add LOESS smooth line for non-linear pattern detection
|
||||||
|
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
|
||||||
|
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
|
||||||
|
loess_pred <- predict(loess_fit, x_seq)
|
||||||
|
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
|
||||||
|
|
||||||
|
# Calculate R-squared for linear fit
|
||||||
|
r_squared <- summary(lm_fit)$r.squared
|
||||||
|
cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Residual analysis for linearity
|
||||||
|
pdf("residual_plots.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(3, 5))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
residuals <- residuals(lm_fit)
|
||||||
|
fitted <- fitted(lm_fit)
|
||||||
|
|
||||||
|
plot(fitted, residuals,
|
||||||
|
main = paste("Residuals:", var1, "vs", var2),
|
||||||
|
xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
|
||||||
|
abline(h = 0, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add smooth line to residuals
|
||||||
|
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
# Calculate correlation matrix (Spearman only)
|
||||||
|
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
|
||||||
|
|
||||||
|
# Print correlation matrix with 5 decimal places
|
||||||
|
print(round(cor_matrix_spearman, 5))
|
||||||
|
|
||||||
|
# Separate correlations between the two sets (Spearman)
|
||||||
|
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
|
||||||
|
print(round(set1_set2_cor, 5))
|
||||||
|
|
||||||
|
# Calculate correlations within each set (Spearman)
|
||||||
|
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
|
||||||
|
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
|
||||||
|
|
||||||
|
# Statistical significance tests (Spearman)
|
||||||
|
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
|
||||||
|
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create correlation plot (Spearman only)
|
||||||
|
pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
|
||||||
|
corrplot(cor_matrix_spearman, method = "color", type = "upper",
|
||||||
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
||||||
|
addCoef.col = "black", number.cex = 0.7,
|
||||||
|
title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Summary statistics
|
||||||
|
desc_stats <- describe(correlation_data)
|
||||||
|
print(round(desc_stats, 5))
|
||||||
|
|
||||||
|
# Save results to CSV files
|
||||||
|
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
|
||||||
|
write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
|
||||||
|
|
||||||
|
# Save correlation results in a formatted table
|
||||||
|
cor_results <- data.frame(
|
||||||
|
Variable1 = character(),
|
||||||
|
Variable2 = character(),
|
||||||
|
Spearman_r = numeric(),
|
||||||
|
P_value = numeric(),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract significant correlations between sets
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
r_val <- cor_matrix_spearman[var1, var2]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
|
||||||
|
cor_results <- rbind(cor_results, data.frame(
|
||||||
|
Variable1 = var1,
|
||||||
|
Variable2 = var2,
|
||||||
|
Spearman_r = r_val,
|
||||||
|
P_value = p_val,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write.csv(cor_results, "correlation_exp1.csv", row.names = FALSE)
|
||||||
45
eohi1/dataP 02 - cor means average over time frames.r
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Load the data
|
||||||
|
exp1_data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Define all NPastDiff and NFutDiff variables
|
||||||
|
all_diff_vars <- c(
|
||||||
|
"NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
|
||||||
|
"NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
|
||||||
|
"NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
|
||||||
|
"NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied", "NPastDiff_life_important", "NPastDiff_life_change",
|
||||||
|
"NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel",
|
||||||
|
"NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex",
|
||||||
|
"NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice",
|
||||||
|
"NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied", "NFutDiff_life_important", "NFutDiff_life_change"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define DGEN variables to average
|
||||||
|
dgen_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
|
||||||
|
"futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
|
||||||
|
|
||||||
|
# Calculate domain_mean as average of all 40 variables
|
||||||
|
exp1_data$domain_mean <- rowMeans(exp1_data[, all_diff_vars], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Calculate DGEN_mean as average of all 8 DGEN variables
|
||||||
|
exp1_data$DGEN_mean <- rowMeans(exp1_data[, dgen_vars], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Save the updated data
|
||||||
|
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
|
||||||
|
|
||||||
|
# Display summary of the calculated means
|
||||||
|
cat("Domain mean summary (average of all 40 NPastDiff and NFutDiff variables):\n")
|
||||||
|
summary(exp1_data$domain_mean)
|
||||||
|
|
||||||
|
cat("\nDGEN mean summary (average of all 8 DGEN variables):\n")
|
||||||
|
summary(exp1_data$DGEN_mean)
|
||||||
|
|
||||||
|
# Show first few rows to verify calculations
|
||||||
|
cat("\nFirst 5 rows of calculated means:\n")
|
||||||
|
print(exp1_data[1:5, c("domain_mean", "DGEN_mean")])
|
||||||
|
|
||||||
|
# Show the individual DGEN values for first 5 rows to verify math
|
||||||
|
cat("\nFirst 5 rows of individual DGEN values for verification:\n")
|
||||||
|
print(exp1_data[1:5, dgen_vars])
|
||||||
104
eohi1/datap 03 - CORRECT domain specific EHI vars.r
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
ehi1 <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
# Create EHI difference variables (NPast - NFut)
|
||||||
|
# Preferences
|
||||||
|
ehi1$ehi_pref_read <- ehi1$NPastDiff_pref_read - ehi1$NFutDiff_pref_read
|
||||||
|
ehi1$ehi_pref_music <- ehi1$NPastDiff_pref_music - ehi1$NFutDiff_pref_music
|
||||||
|
ehi1$ehi_pref_tv <- ehi1$NPastDiff_pref_tv - ehi1$NFutDiff_pref_tv
|
||||||
|
ehi1$ehi_pref_nap <- ehi1$NPastDiff_pref_nap - ehi1$NFutDiff_pref_nap
|
||||||
|
ehi1$ehi_pref_travel <- ehi1$NPastDiff_pref_travel - ehi1$NFutDiff_pref_travel
|
||||||
|
|
||||||
|
# Personality
|
||||||
|
ehi1$ehi_pers_extravert <- ehi1$NPastDiff_pers_extravert - ehi1$NFutDiff_pers_extravert
|
||||||
|
ehi1$ehi_pers_critical <- ehi1$NPastDiff_pers_critical - ehi1$NFutDiff_pers_critical
|
||||||
|
ehi1$ehi_pers_dependable <- ehi1$NPastDiff_pers_dependable - ehi1$NFutDiff_pers_dependable
|
||||||
|
ehi1$ehi_pers_anxious <- ehi1$NPastDiff_pers_anxious - ehi1$NFutDiff_pers_anxious
|
||||||
|
ehi1$ehi_pers_complex <- ehi1$NPastDiff_pers_complex - ehi1$NFutDiff_pers_complex
|
||||||
|
|
||||||
|
# Values
|
||||||
|
ehi1$ehi_val_obey <- ehi1$NPastDiff_val_obey - ehi1$NFutDiff_val_obey
|
||||||
|
ehi1$ehi_val_trad <- ehi1$NPastDiff_val_trad - ehi1$NFutDiff_val_trad
|
||||||
|
ehi1$ehi_val_opinion <- ehi1$NPastDiff_val_opinion - ehi1$NFutDiff_val_opinion
|
||||||
|
ehi1$ehi_val_performance <- ehi1$NPastDiff_val_performance - ehi1$NFutDiff_val_performance
|
||||||
|
ehi1$ehi_val_justice <- ehi1$NPastDiff_val_justice - ehi1$NFutDiff_val_justice
|
||||||
|
|
||||||
|
# Life satisfaction
|
||||||
|
ehi1$ehi_life_ideal <- ehi1$NPastDiff_life_ideal - ehi1$NFutDiff_life_ideal
|
||||||
|
ehi1$ehi_life_excellent <- ehi1$NPastDiff_life_excellent - ehi1$NFutDiff_life_excellent
|
||||||
|
ehi1$ehi_life_satisfied <- ehi1$NPastDiff_life_satisfied - ehi1$NFutDiff_life_satisfied
|
||||||
|
ehi1$ehi_life_important <- ehi1$NPastDiff_life_important - ehi1$NFutDiff_life_important
|
||||||
|
ehi1$ehi_life_change <- ehi1$NPastDiff_life_change - ehi1$NFutDiff_life_change
|
||||||
|
|
||||||
|
# QA: Verify calculations
|
||||||
|
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
|
||||||
|
cat("Verifying EHI difference calculations (NPast - NFut)\n\n")
|
||||||
|
|
||||||
|
qa_pairs <- list(
|
||||||
|
list(npast = "NPastDiff_pref_read", nfut = "NFutDiff_pref_read", target = "ehi_pref_read"),
|
||||||
|
list(npast = "NPastDiff_pref_music", nfut = "NFutDiff_pref_music", target = "ehi_pref_music"),
|
||||||
|
list(npast = "NPastDiff_pref_tv", nfut = "NFutDiff_pref_tv", target = "ehi_pref_tv"),
|
||||||
|
list(npast = "NPastDiff_pref_nap", nfut = "NFutDiff_pref_nap", target = "ehi_pref_nap"),
|
||||||
|
list(npast = "NPastDiff_pref_travel", nfut = "NFutDiff_pref_travel", target = "ehi_pref_travel"),
|
||||||
|
list(npast = "NPastDiff_pers_extravert", nfut = "NFutDiff_pers_extravert", target = "ehi_pers_extravert"),
|
||||||
|
list(npast = "NPastDiff_pers_critical", nfut = "NFutDiff_pers_critical", target = "ehi_pers_critical"),
|
||||||
|
list(npast = "NPastDiff_pers_dependable", nfut = "NFutDiff_pers_dependable", target = "ehi_pers_dependable"),
|
||||||
|
list(npast = "NPastDiff_pers_anxious", nfut = "NFutDiff_pers_anxious", target = "ehi_pers_anxious"),
|
||||||
|
list(npast = "NPastDiff_pers_complex", nfut = "NFutDiff_pers_complex", target = "ehi_pers_complex"),
|
||||||
|
list(npast = "NPastDiff_val_obey", nfut = "NFutDiff_val_obey", target = "ehi_val_obey"),
|
||||||
|
list(npast = "NPastDiff_val_trad", nfut = "NFutDiff_val_trad", target = "ehi_val_trad"),
|
||||||
|
list(npast = "NPastDiff_val_opinion", nfut = "NFutDiff_val_opinion", target = "ehi_val_opinion"),
|
||||||
|
list(npast = "NPastDiff_val_performance", nfut = "NFutDiff_val_performance", target = "ehi_val_performance"),
|
||||||
|
list(npast = "NPastDiff_val_justice", nfut = "NFutDiff_val_justice", target = "ehi_val_justice"),
|
||||||
|
list(npast = "NPastDiff_life_ideal", nfut = "NFutDiff_life_ideal", target = "ehi_life_ideal"),
|
||||||
|
list(npast = "NPastDiff_life_excellent", nfut = "NFutDiff_life_excellent", target = "ehi_life_excellent"),
|
||||||
|
list(npast = "NPastDiff_life_satisfied", nfut = "NFutDiff_life_satisfied", target = "ehi_life_satisfied"),
|
||||||
|
list(npast = "NPastDiff_life_important", nfut = "NFutDiff_life_important", target = "ehi_life_important"),
|
||||||
|
list(npast = "NPastDiff_life_change", nfut = "NFutDiff_life_change", target = "ehi_life_change")
|
||||||
|
)
|
||||||
|
|
||||||
|
all_checks_passed <- TRUE
|
||||||
|
|
||||||
|
for (pair in qa_pairs) {
|
||||||
|
# Calculate expected difference
|
||||||
|
expected_diff <- ehi1[[pair$npast]] - ehi1[[pair$nfut]]
|
||||||
|
|
||||||
|
# Get actual value in target variable
|
||||||
|
actual_value <- ehi1[[pair$target]]
|
||||||
|
|
||||||
|
# Compare (allowing for floating point precision issues)
|
||||||
|
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
|
||||||
|
|
||||||
|
if (length(discrepancies) > 0) {
|
||||||
|
cat(sprintf("FAIL: %s\n", pair$target))
|
||||||
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
||||||
|
length(discrepancies),
|
||||||
|
paste(head(discrepancies, 10), collapse = ", ")))
|
||||||
|
|
||||||
|
# Show first discrepancy details
|
||||||
|
row_num <- discrepancies[1]
|
||||||
|
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
|
||||||
|
row_num,
|
||||||
|
pair$npast, ehi1[[pair$npast]][row_num],
|
||||||
|
pair$nfut, ehi1[[pair$nfut]][row_num],
|
||||||
|
expected_diff[row_num],
|
||||||
|
pair$target, actual_value[row_num]))
|
||||||
|
all_checks_passed <- FALSE
|
||||||
|
} else {
|
||||||
|
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(ehi1)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
if (all_checks_passed) {
|
||||||
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
||||||
|
} else {
|
||||||
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated dataset
|
||||||
|
write.csv(ehi1, "ehi1.csv", row.names = FALSE)
|
||||||
|
cat("\nDataset saved to ehi1.csv\n")
|
||||||
167
eohi1/datap 04 - CORRECT ehi var means.r
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
ehi1 <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
# Calculate mean scores for EHI variables
|
||||||
|
|
||||||
|
# 1. Preferences mean
|
||||||
|
ehi1$ehi_pref_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music",
|
||||||
|
"ehi_pref_tv", "ehi_pref_nap",
|
||||||
|
"ehi_pref_travel")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# 2. Personality mean
|
||||||
|
ehi1$ehi_pers_mean <- rowMeans(ehi1[, c("ehi_pers_extravert", "ehi_pers_critical",
|
||||||
|
"ehi_pers_dependable", "ehi_pers_anxious",
|
||||||
|
"ehi_pers_complex")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# 3. Values mean
|
||||||
|
ehi1$ehi_val_mean <- rowMeans(ehi1[, c("ehi_val_obey", "ehi_val_trad",
|
||||||
|
"ehi_val_opinion", "ehi_val_performance",
|
||||||
|
"ehi_val_justice")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# 4. Life satisfaction mean
|
||||||
|
ehi1$ehi_life_mean <- rowMeans(ehi1[, c("ehi_life_ideal", "ehi_life_excellent",
|
||||||
|
"ehi_life_satisfied", "ehi_life_important",
|
||||||
|
"ehi_life_change")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# 5. Global mean (all 20 variables)
|
||||||
|
ehi1$ehi_global_mean <- rowMeans(ehi1[, c("ehi_pref_read", "ehi_pref_music",
|
||||||
|
"ehi_pref_tv", "ehi_pref_nap",
|
||||||
|
"ehi_pref_travel",
|
||||||
|
"ehi_pers_extravert", "ehi_pers_critical",
|
||||||
|
"ehi_pers_dependable", "ehi_pers_anxious",
|
||||||
|
"ehi_pers_complex",
|
||||||
|
"ehi_val_obey", "ehi_val_trad",
|
||||||
|
"ehi_val_opinion", "ehi_val_performance",
|
||||||
|
"ehi_val_justice",
|
||||||
|
"ehi_life_ideal", "ehi_life_excellent",
|
||||||
|
"ehi_life_satisfied", "ehi_life_important",
|
||||||
|
"ehi_life_change")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# QA: Verify mean calculations
|
||||||
|
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
|
||||||
|
cat("Verifying EHI mean calculations\n\n")
|
||||||
|
|
||||||
|
cat("--- FIRST 5 ROWS: PREFERENCES MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i],
|
||||||
|
ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i],
|
||||||
|
ehi1$ehi_pref_travel[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- ehi1$ehi_pref_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: PERSONALITY MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i],
|
||||||
|
ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i],
|
||||||
|
ehi1$ehi_pers_complex[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- ehi1$ehi_pers_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: VALUES MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i],
|
||||||
|
ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i],
|
||||||
|
ehi1$ehi_val_justice[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- ehi1$ehi_val_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: LIFE SATISFACTION MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i],
|
||||||
|
ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i],
|
||||||
|
ehi1$ehi_life_change[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- ehi1$ehi_life_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: GLOBAL MEAN (20 variables) ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(ehi1$ehi_pref_read[i], ehi1$ehi_pref_music[i],
|
||||||
|
ehi1$ehi_pref_tv[i], ehi1$ehi_pref_nap[i],
|
||||||
|
ehi1$ehi_pref_travel[i],
|
||||||
|
ehi1$ehi_pers_extravert[i], ehi1$ehi_pers_critical[i],
|
||||||
|
ehi1$ehi_pers_dependable[i], ehi1$ehi_pers_anxious[i],
|
||||||
|
ehi1$ehi_pers_complex[i],
|
||||||
|
ehi1$ehi_val_obey[i], ehi1$ehi_val_trad[i],
|
||||||
|
ehi1$ehi_val_opinion[i], ehi1$ehi_val_performance[i],
|
||||||
|
ehi1$ehi_val_justice[i],
|
||||||
|
ehi1$ehi_life_ideal[i], ehi1$ehi_life_excellent[i],
|
||||||
|
ehi1$ehi_life_satisfied[i], ehi1$ehi_life_important[i],
|
||||||
|
ehi1$ehi_life_change[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- ehi1$ehi_global_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: 20 values → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Overall QA check for all rows
|
||||||
|
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
|
||||||
|
|
||||||
|
qa_checks <- list(
|
||||||
|
list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel"),
|
||||||
|
target = "ehi_pref_mean", name = "Preferences"),
|
||||||
|
list(vars = c("ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex"),
|
||||||
|
target = "ehi_pers_mean", name = "Personality"),
|
||||||
|
list(vars = c("ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice"),
|
||||||
|
target = "ehi_val_mean", name = "Values"),
|
||||||
|
list(vars = c("ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
|
||||||
|
target = "ehi_life_mean", name = "Life Satisfaction"),
|
||||||
|
list(vars = c("ehi_pref_read", "ehi_pref_music", "ehi_pref_tv", "ehi_pref_nap", "ehi_pref_travel",
|
||||||
|
"ehi_pers_extravert", "ehi_pers_critical", "ehi_pers_dependable", "ehi_pers_anxious", "ehi_pers_complex",
|
||||||
|
"ehi_val_obey", "ehi_val_trad", "ehi_val_opinion", "ehi_val_performance", "ehi_val_justice",
|
||||||
|
"ehi_life_ideal", "ehi_life_excellent", "ehi_life_satisfied", "ehi_life_important", "ehi_life_change"),
|
||||||
|
target = "ehi_global_mean", name = "Global")
|
||||||
|
)
|
||||||
|
|
||||||
|
all_checks_passed <- TRUE
|
||||||
|
|
||||||
|
for (check in qa_checks) {
|
||||||
|
calc_mean <- rowMeans(ehi1[, check$vars], na.rm = TRUE)
|
||||||
|
actual_mean <- ehi1[[check$target]]
|
||||||
|
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
|
||||||
|
|
||||||
|
if (length(discrepancies) > 0) {
|
||||||
|
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
|
||||||
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
||||||
|
length(discrepancies),
|
||||||
|
paste(head(discrepancies, 10), collapse = ", ")))
|
||||||
|
all_checks_passed <- FALSE
|
||||||
|
} else {
|
||||||
|
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
|
||||||
|
check$name, length(check$vars), nrow(ehi1)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
if (all_checks_passed) {
|
||||||
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
||||||
|
} else {
|
||||||
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated dataset
|
||||||
|
write.csv(ehi1, "ehi1.csv", row.names = FALSE)
|
||||||
|
cat("\nDataset saved to ehi1.csv\n")
|
||||||
38
eohi1/datap 15 - education recoded 3 ordinal levels.r
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
data <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
# Check the levels of the demo_edu variable
|
||||||
|
print(levels(factor(data$demo_edu)))
|
||||||
|
|
||||||
|
# Also show the unique values and their frequencies
|
||||||
|
print("\nUnique values and frequencies:")
|
||||||
|
print(table(data$demo_edu, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Recode demo_edu into 3 ordinal levels
|
||||||
|
data$edu3 <- NA
|
||||||
|
|
||||||
|
# HS_TS: High School and Trade School
|
||||||
|
data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
|
||||||
|
|
||||||
|
# C_Ug: College and University - Undergraduate
|
||||||
|
data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
|
||||||
|
|
||||||
|
# grad_prof: University - Graduate, University - PhD, and Professional Degree
|
||||||
|
data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
|
||||||
|
|
||||||
|
# Convert to ordered factor
|
||||||
|
data$edu3 <- factor(data$edu3,
|
||||||
|
levels = c("HS_TS", "C_Ug", "grad_prof"),
|
||||||
|
ordered = TRUE)
|
||||||
|
|
||||||
|
# Check the recoded variable
|
||||||
|
print(table(data$edu3, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Verify the recoding
|
||||||
|
print(table(data$demo_edu, data$edu3, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Save the updated dataset with the new edu3 variable
|
||||||
|
write.csv(data, "ehi1.csv", row.names = FALSE)
|
||||||
14
eohi1/descriptive_statistics.csv
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
|
||||||
|
"eohiDGEN_pref",1,1063,0.31138,2.29712,0,0.27497,1.4826,-10,10,20,0.16174,2.49869,0.07046
|
||||||
|
"eohiDGEN_pers",2,1063,0.44309,2.40063,0,0.39365,1.4826,-10,10,20,0.04618,2.20622,0.07363
|
||||||
|
"eohiDGEN_val",3,1063,0.49012,2.38159,0,0.3772,1.4826,-10,10,20,0.37348,2.6545,0.07305
|
||||||
|
"eohiDGEN_life",4,1063,0.41016,2.72624,0,0.3866,1.4826,-10,10,20,0.06705,1.55144,0.08362
|
||||||
|
"eohiDGEN_mean",5,1063,0.41488,1.7406,0,0.34323,0.99334,-9,9.33,18.33,0.43898,2.96172,0.05339
|
||||||
|
"ehi_pref_mean",6,1063,0.13885,0.64266,0,0.09683,0.29652,-2.2,3.2,5.4,0.93756,3.43951,0.01971
|
||||||
|
"ehi_pers_mean",7,1063,0.09614,0.71979,0,0.07027,0.59304,-3.6,3.8,7.4,0.53524,3.53968,0.02208
|
||||||
|
"ehi_val_mean",8,1063,0.19699,0.72241,0,0.14266,0.29652,-3.4,4.4,7.8,1.17086,5.08196,0.02216
|
||||||
|
"ehi_life_mean",9,1063,0.1127,1.12639,0,0.06792,0.59304,-4.8,6,10.8,0.45879,2.97822,0.03455
|
||||||
|
"ehi_global_mean",10,1063,0.13617,0.50405,0.05,0.09871,0.37065,-1.65,2.65,4.3,1.09805,3.7645,0.01546
|
||||||
|
"AOT_total",11,1063,0.6945,0.60845,0.75,0.69859,0.7413,-1.125,2,3.125,-0.09166,-0.61789,0.01866
|
||||||
|
"CRT_correct",12,1063,0.3132,0.36479,0.33,0.26666,0.48926,0,1,1,0.79518,-0.77639,0.01119
|
||||||
|
"CRT_int",13,1063,0.614,0.36086,0.67,0.6424,0.48926,0,1,1,-0.45012,-1.11195,0.01107
|
||||||
|
107
eohi1/descriptives - gen knowledge questions.r
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
library(tidyverse)
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Read data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Select variables ending exactly with _T or _F
|
||||||
|
df <- data %>% select(matches("(_T|_F)$"))
|
||||||
|
|
||||||
|
# Remove demo_f variable (if present)
|
||||||
|
df <- df %>% select(-any_of("demo_f"))
|
||||||
|
|
||||||
|
str(df)
|
||||||
|
|
||||||
|
# Coerce to numeric where possible (without breaking non-numeric)
|
||||||
|
df_num <- df %>%
|
||||||
|
mutate(across(everything(), ~ suppressWarnings(as.numeric(.))))
|
||||||
|
|
||||||
|
# Compute count and proportion correct per variable
|
||||||
|
descriptives <- purrr::imap_dfr(df_num, function(col, name) {
|
||||||
|
x <- suppressWarnings(as.numeric(col))
|
||||||
|
x <- x[!is.na(x)]
|
||||||
|
n_total <- length(x)
|
||||||
|
n_correct <- if (n_total == 0) NA_integer_ else sum(x == 1)
|
||||||
|
prop <- if (n_total == 0) NA_real_ else n_correct / n_total
|
||||||
|
|
||||||
|
# Extract difficulty number from variable name and map to expected range
|
||||||
|
difficulty_num <- as.numeric(gsub(".*_([0-9]+)_[TF]$", "\\1", name))
|
||||||
|
expected_ranges <- list(
|
||||||
|
"15" = c(0.15, 0.25),
|
||||||
|
"35" = c(0.35, 0.45),
|
||||||
|
"55" = c(0.55, 0.65),
|
||||||
|
"75" = c(0.75, 0.85)
|
||||||
|
)
|
||||||
|
|
||||||
|
if (as.character(difficulty_num) %in% names(expected_ranges)) {
|
||||||
|
expected_range <- expected_ranges[[as.character(difficulty_num)]]
|
||||||
|
match_difficulty <- if (prop >= expected_range[1] && prop <= expected_range[2]) "YES" else "NO"
|
||||||
|
} else {
|
||||||
|
match_difficulty <- "UNKNOWN"
|
||||||
|
}
|
||||||
|
|
||||||
|
tibble(
|
||||||
|
variable = name,
|
||||||
|
n_total = n_total,
|
||||||
|
n_correct = n_correct,
|
||||||
|
prop_correct = round(prop, 5),
|
||||||
|
match_difficulty = match_difficulty
|
||||||
|
)
|
||||||
|
}) %>%
|
||||||
|
arrange(variable)
|
||||||
|
|
||||||
|
# Bin proportions into .10-.19, .20-.29, ..., .90-.99 and count variables per bin
|
||||||
|
bin_levels <- sapply(1:9, function(k) sprintf("%.2f-%.2f", k / 10, k / 10 + 0.09))
|
||||||
|
bin_factor <- cut(
|
||||||
|
descriptives$prop_correct,
|
||||||
|
breaks = seq(0.10, 1.00, by = 0.10),
|
||||||
|
right = FALSE,
|
||||||
|
include.lowest = FALSE,
|
||||||
|
labels = bin_levels
|
||||||
|
)
|
||||||
|
bin_counts <- tibble(bin = factor(bin_factor, levels = bin_levels)) %>%
|
||||||
|
group_by(bin) %>%
|
||||||
|
summarise(num_variables = n(), .groups = "drop")
|
||||||
|
|
||||||
|
# Additional bins: 0.15-0.24, 0.25-0.34, ..., 0.85-0.94
|
||||||
|
bin15_levels <- sapply(seq(0.15, 0.85, by = 0.10), function(lo) sprintf("%.2f-%.2f", lo, lo + 0.09))
|
||||||
|
bin15_factor <- cut(
|
||||||
|
descriptives$prop_correct,
|
||||||
|
breaks = seq(0.15, 0.95, by = 0.10),
|
||||||
|
right = FALSE,
|
||||||
|
include.lowest = FALSE,
|
||||||
|
labels = bin15_levels
|
||||||
|
)
|
||||||
|
bin15_counts <- tibble(bin = factor(bin15_factor, levels = bin15_levels)) %>%
|
||||||
|
group_by(bin) %>%
|
||||||
|
summarise(num_variables = n(), .groups = "drop")
|
||||||
|
|
||||||
|
# View
|
||||||
|
print(descriptives, n = Inf)
|
||||||
|
cat("\nBin counts (.10-.19, .20-.29, ..., .90-.99):\n")
|
||||||
|
print(bin_counts, n = Inf)
|
||||||
|
cat("\nBin counts (0.15-0.24, 0.25-0.34, ..., 0.85-0.94):\n")
|
||||||
|
print(bin15_counts, n = Inf)
|
||||||
|
|
||||||
|
# Histogram of proportion correct with custom bins
|
||||||
|
histogram <- ggplot(descriptives, aes(x = prop_correct)) +
|
||||||
|
geom_histogram(
|
||||||
|
breaks = seq(0.15, 0.95, by = 0.10),
|
||||||
|
fill = "lightblue",
|
||||||
|
color = "black",
|
||||||
|
alpha = 0.7
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
title = "Distribution of Proportion Correct",
|
||||||
|
x = "Proportion Correct",
|
||||||
|
y = "Number of Variables"
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
scale_x_continuous(breaks = seq(0.15, 0.95, by = 0.10))
|
||||||
|
|
||||||
|
print(histogram)
|
||||||
|
|
||||||
|
# Optionally save
|
||||||
|
# readr::write_csv(descriptives, "exp1_TF_descriptives.csv")
|
||||||
88
eohi1/e1 - reliability ehi.r
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
df <- read.csv("ehi1.csv")
|
||||||
|
library(psych)
|
||||||
|
library(knitr)
|
||||||
|
|
||||||
|
# fixed-decimal formatter (five decimals)
|
||||||
|
fmt5 <- function(x) formatC(x, format = "f", digits = 5)
|
||||||
|
|
||||||
|
# Select the 4 variables for reliability analysis
|
||||||
|
reliability_data <- df[complete.cases(df[, c("eohiDGEN_mean", "ehi_global_mean")]),
|
||||||
|
c("eohiDGEN_mean", "ehi_global_mean")]
|
||||||
|
|
||||||
|
# Cronbach's Alpha
|
||||||
|
alpha_result <- alpha(reliability_data)
|
||||||
|
|
||||||
|
# Split-half reliability
|
||||||
|
split_half <- splitHalf(reliability_data)
|
||||||
|
|
||||||
|
# Inter-item correlations
|
||||||
|
cor_matrix <- cor(reliability_data, use = "complete.obs")
|
||||||
|
|
||||||
|
# Two-item reliability summary (if applicable)
|
||||||
|
two_item_section <- ""
|
||||||
|
if (ncol(reliability_data) == 2) {
|
||||||
|
n_complete <- sum(complete.cases(reliability_data))
|
||||||
|
r_pearson <- cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "pearson")
|
||||||
|
r_spearman <- suppressWarnings(cor(reliability_data[, 1], reliability_data[, 2], use = "complete.obs", method = "spearman"))
|
||||||
|
# Fisher z CI for r
|
||||||
|
fisher_z <- atanh(r_pearson)
|
||||||
|
se_z <- 1 / sqrt(n_complete - 3)
|
||||||
|
z_crit <- qnorm(0.975)
|
||||||
|
ci_z_lower <- fisher_z - z_crit * se_z
|
||||||
|
ci_z_upper <- fisher_z + z_crit * se_z
|
||||||
|
ci_r_lower <- tanh(ci_z_lower)
|
||||||
|
ci_r_upper <- tanh(ci_z_upper)
|
||||||
|
# Approximate Fisher z CI for Spearman rho (large-sample approximation)
|
||||||
|
fisher_z_s <- atanh(r_spearman)
|
||||||
|
ci_zs_lower <- fisher_z_s - z_crit * se_z
|
||||||
|
ci_zs_upper <- fisher_z_s + z_crit * se_z
|
||||||
|
ci_s_lower <- tanh(ci_zs_lower)
|
||||||
|
ci_s_upper <- tanh(ci_zs_upper)
|
||||||
|
# Spearman–Brown/Cronbach's alpha for k = 2
|
||||||
|
alpha_sb <- (2 * r_pearson) / (1 + r_pearson)
|
||||||
|
alpha_lower <- (2 * ci_r_lower) / (1 + ci_r_lower)
|
||||||
|
alpha_upper <- (2 * ci_r_upper) / (1 + ci_r_upper)
|
||||||
|
|
||||||
|
two_item_section <- paste0(
|
||||||
|
"<h2>Two-Item Reliability Summary</h2>",
|
||||||
|
"<p>Pearson r: ", fmt5(r_pearson), " (95% CI: [", fmt5(ci_r_lower), ", ", fmt5(ci_r_upper), "])</p>",
|
||||||
|
"<p>Spearman r: ", fmt5(r_spearman), " (95% CI: [", fmt5(ci_s_lower), ", ", fmt5(ci_s_upper), "])</p>",
|
||||||
|
"<p>Spearman–Brown / Cronbach's ", intToUtf8(945), ": ", fmt5(alpha_sb),
|
||||||
|
" (95% CI: [", fmt5(alpha_lower), ", ", fmt5(alpha_upper), "])</p>"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create a summary table
|
||||||
|
summary_table <- data.frame(
|
||||||
|
Variable = names(reliability_data),
|
||||||
|
n = nrow(reliability_data),
|
||||||
|
Mean = round(colMeans(reliability_data, na.rm = TRUE), 5),
|
||||||
|
SD = round(apply(reliability_data, 2, sd, na.rm = TRUE), 5),
|
||||||
|
Min = round(apply(reliability_data, 2, min, na.rm = TRUE), 5),
|
||||||
|
Max = round(apply(reliability_data, 2, max, na.rm = TRUE), 5),
|
||||||
|
Median = round(apply(reliability_data, 2, median, na.rm = TRUE), 5)
|
||||||
|
)
|
||||||
|
|
||||||
|
html_output <- paste0(
|
||||||
|
"<html><head><title>EHI Reliability Analysis</title></head><body>",
|
||||||
|
"<h1>EHI Reliability Analysis</h1>",
|
||||||
|
two_item_section,
|
||||||
|
|
||||||
|
"<h2>Cronbach's Alpha</h2>",
|
||||||
|
kable(alpha_result$total, format = "html"),
|
||||||
|
|
||||||
|
"<h2>Split-Half Reliability</h2>",
|
||||||
|
"<p>Maximum split half reliability: ", fmt5(split_half$maxrb), "</p>",
|
||||||
|
|
||||||
|
"<h2>Item-Level Statistics</h2>",
|
||||||
|
kable(alpha_result$item.stats, format = "html"),
|
||||||
|
|
||||||
|
|
||||||
|
"</body></html>"
|
||||||
|
)
|
||||||
|
|
||||||
|
writeLines(html_output, "EHI reliability.html")
|
||||||
BIN
eohi1/education_DGEN_means.png
Normal file
|
After Width: | Height: | Size: 177 KiB |
BIN
eohi1/education_domain_means.png
Normal file
|
After Width: | Height: | Size: 176 KiB |
1064
eohi1/ehi1 - Copy.csv
Normal file
1064
eohi1/ehi1.csv
Normal file
81
eohi1/eohi_calibration_correlations_summary.csv
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
eohi_var,cal_var,n,pearson_r,pearson_ci_lower,pearson_ci_upper,pearson_p,spearman_rho,spearman_ci_lower,spearman_ci_upper,spearman_p
|
||||||
|
eohi_mean,cal_selfActual,1063,-0.09447,-0.15019,-0.0362,0.00205,-0.11288,-0.1699,-0.05351,0.00023
|
||||||
|
eohi_val,cal_selfActual,1063,-0.09384,-0.15347,-0.03616,0.00219,-0.09963,-0.16267,-0.04046,0.00114
|
||||||
|
eohi_pers,cal_selfActual,1063,-0.07652,-0.12898,-0.01613,0.01258,-0.08281,-0.14357,-0.02535,0.00691
|
||||||
|
eohiDGEN_pref,cal_15,1063,0.11449,0.04892,0.17239,0.00018,0.08071,0.01776,0.13995,0.00847
|
||||||
|
eohi_pers,cal_55,1063,-0.06004,-0.1185,-0.0065,0.05036,-0.07699,-0.13816,-0.01519,0.01204
|
||||||
|
eohi_mean,cal_55,1063,-0.06632,-0.11782,-0.01552,0.03061,-0.07588,-0.13634,-0.01868,0.01333
|
||||||
|
eohiDGEN_val,cal_55,1063,-0.05006,-0.1073,0.00598,0.10281,-0.06964,-0.12753,-0.01077,0.02318
|
||||||
|
eohiDGEN_mean,cal_true,1063,0.05754,-0.0009,0.11652,0.06074,0.0676,0.00725,0.12602,0.02754
|
||||||
|
eohiDGEN_life,cal_75,1063,-0.04564,-0.10516,0.01314,0.13702,-0.05638,-0.11573,0.00538,0.06615
|
||||||
|
eohi_life,cal_75,1063,-0.03875,-0.09577,0.01879,0.20683,-0.05565,-0.11564,0.00491,0.06974
|
||||||
|
eohiDGEN_mean,cal_15,1063,0.07628,0.01598,0.13615,0.01285,0.05395,-0.00598,0.11493,0.0787
|
||||||
|
eohi_val,cal_55,1063,-0.05666,-0.10898,-0.00182,0.06479,-0.05346,-0.10885,0.00265,0.0815
|
||||||
|
eohiDGEN_life,cal_55,1063,0.05044,-0.00773,0.10614,0.10026,0.04956,-0.00878,0.11087,0.10635
|
||||||
|
eohiDGEN_pref,cal_true,1063,0.07292,0.0095,0.134,0.01741,0.04803,-0.0094,0.108,0.11761
|
||||||
|
eohi_pref,cal_selfActual,1063,-0.03281,-0.09762,0.032,0.28519,-0.04423,-0.10373,0.01867,0.14958
|
||||||
|
eohiDGEN_val,cal_false,1063,-0.02787,-0.09239,0.03411,0.36398,-0.04393,-0.10451,0.01416,0.15236
|
||||||
|
eohiDGEN_pers,cal_15,1063,0.04714,-0.01715,0.10944,0.12451,0.04308,-0.01746,0.10317,0.16046
|
||||||
|
eohiDGEN_life,cal_false,1063,0.03476,-0.02508,0.09492,0.25753,0.04047,-0.02102,0.10411,0.18735
|
||||||
|
eohi_life,cal_true,1063,-0.02269,-0.08192,0.0416,0.45982,-0.03779,-0.0985,0.01964,0.21833
|
||||||
|
eohiDGEN_life,cal_15,1063,0.05091,-0.01796,0.11623,0.09709,0.03755,-0.02524,0.09835,0.22121
|
||||||
|
eohiDGEN_life,cal_true,1063,-0.00526,-0.06612,0.05833,0.86395,-0.03744,-0.09633,0.02643,0.22263
|
||||||
|
eohiDGEN_life,cal_selfActual,1063,0.02779,-0.03579,0.0868,0.36538,0.03688,-0.02625,0.10029,0.22962
|
||||||
|
eohi_val,cal_75,1063,-0.03432,-0.09303,0.02511,0.26362,-0.03548,-0.09664,0.02788,0.24775
|
||||||
|
eohiDGEN_val,cal_selfActual,1063,0.02114,-0.04078,0.08546,0.49105,0.03547,-0.02636,0.09663,0.24789
|
||||||
|
eohiDGEN_pers,cal_true,1063,0.02354,-0.03617,0.08422,0.44334,0.03532,-0.02544,0.09158,0.24994
|
||||||
|
eohiDGEN_pers,cal_35,1063,-0.03495,-0.09252,0.02312,0.25493,-0.03489,-0.09183,0.02535,0.25578
|
||||||
|
eohiDGEN_pers,cal_75,1063,-0.01611,-0.07516,0.04168,0.59971,-0.03379,-0.08928,0.02687,0.271
|
||||||
|
eohiDGEN_pers,cal_false,1063,-0.02503,-0.08151,0.03334,0.4149,-0.03266,-0.08708,0.02624,0.28745
|
||||||
|
eohiDGEN_val,cal_global,1063,-0.00447,-0.05803,0.05482,0.8843,-0.03204,-0.08825,0.0252,0.29666
|
||||||
|
eohiDGEN_mean,cal_55,1063,-0.03699,-0.08943,0.01668,0.2282,-0.02929,-0.08473,0.03097,0.34006
|
||||||
|
eohiDGEN_pers,cal_selfActual,1063,-0.03827,-0.10619,0.02517,0.21254,-0.02835,-0.09145,0.03191,0.35572
|
||||||
|
eohi_life,cal_selfActual,1063,0.04737,-0.01652,0.10847,0.12274,0.02825,-0.03176,0.08666,0.35753
|
||||||
|
eohi_mean,cal_75,1063,-0.01187,-0.06715,0.04855,0.69916,-0.0275,-0.0859,0.03279,0.37045
|
||||||
|
eohiDGEN_val,cal_15,1063,0.00914,-0.04676,0.06618,0.76607,-0.02707,-0.08456,0.02837,0.37789
|
||||||
|
eohi_pref,cal_55,1063,-0.02632,-0.08106,0.02643,0.39136,-0.02524,-0.08705,0.03558,0.41101
|
||||||
|
eohiDGEN_pref,cal_35,1063,-0.00852,-0.0681,0.0469,0.78147,-0.02501,-0.08136,0.03028,0.41525
|
||||||
|
eohi_val,cal_global,1063,-0.03245,-0.08628,0.02097,0.29049,-0.02445,-0.0837,0.03405,0.42589
|
||||||
|
eohiDGEN_pref,cal_global,1063,0.05135,-0.00854,0.10781,0.09423,0.02438,-0.03698,0.07747,0.42716
|
||||||
|
eohi_mean,cal_global,1063,-0.00987,-0.0669,0.04625,0.74782,-0.02423,-0.08093,0.03538,0.43002
|
||||||
|
eohi_mean,cal_false,1063,0.00103,-0.05609,0.05796,0.97329,-0.02413,-0.08528,0.034,0.43184
|
||||||
|
eohi_pers,cal_15,1063,0.0424,-0.01973,0.10006,0.16717,0.02377,-0.03499,0.07967,0.4388
|
||||||
|
eohiDGEN_pref,cal_75,1063,0.04194,-0.01865,0.10462,0.17182,0.02253,-0.03687,0.08394,0.46315
|
||||||
|
eohi_val,cal_35,1063,0.00919,-0.04488,0.06395,0.76478,-0.02132,-0.07724,0.0359,0.48748
|
||||||
|
eohiDGEN_mean,cal_75,1063,0.02182,-0.03779,0.08058,0.47734,0.02086,-0.0387,0.08009,0.49684
|
||||||
|
eohi_val,cal_15,1063,-0.0114,-0.06675,0.04342,0.71036,0.02045,-0.0384,0.08265,0.50533
|
||||||
|
eohiDGEN_mean,cal_global,1063,0.01728,-0.04015,0.07341,0.5736,0.01974,-0.03874,0.07654,0.52029
|
||||||
|
eohiDGEN_val,cal_true,1063,0.03211,-0.02651,0.08716,0.29554,0.01936,-0.04555,0.08179,0.52834
|
||||||
|
eohi_mean,cal_15,1063,0.02437,-0.03634,0.08546,0.42732,0.01772,-0.03986,0.07648,0.56377
|
||||||
|
eohiDGEN_pers,cal_55,1063,-0.02109,-0.07397,0.03478,0.49211,-0.0175,-0.0759,0.04546,0.56869
|
||||||
|
eohiDGEN_mean,cal_selfActual,1063,0.00131,-0.05794,0.06413,0.96594,0.0169,-0.04525,0.07532,0.582
|
||||||
|
eohi_life,cal_false,1063,0.02929,-0.02351,0.0794,0.34004,0.01689,-0.04339,0.07467,0.58238
|
||||||
|
eohiDGEN_mean,cal_false,1063,-0.02032,-0.07402,0.03873,0.5082,-0.01576,-0.07583,0.04145,0.60773
|
||||||
|
eohiDGEN_pref,cal_55,1063,-0.01025,-0.064,0.04642,0.73848,-0.01542,-0.0708,0.046,0.61559
|
||||||
|
eohi_life,cal_35,1063,0.03599,-0.02203,0.08892,0.24108,0.01538,-0.04474,0.07414,0.61636
|
||||||
|
eohi_pers,cal_false,1063,-0.00935,-0.06513,0.04644,0.76086,-0.01485,-0.07336,0.04336,0.62859
|
||||||
|
eohi_life,cal_15,1063,0.0156,-0.0452,0.07483,0.61145,0.01432,-0.04315,0.07438,0.64092
|
||||||
|
eohi_pers,cal_true,1063,0.0055,-0.05876,0.07464,0.85794,-0.01291,-0.06976,0.04539,0.67419
|
||||||
|
eohi_pref,cal_75,1063,0.02725,-0.03153,0.08793,0.37474,0.01264,-0.0447,0.07576,0.68049
|
||||||
|
eohi_mean,cal_true,1063,-0.01786,-0.07676,0.04265,0.56079,-0.01207,-0.07091,0.05063,0.69435
|
||||||
|
eohi_val,cal_false,1063,-0.01276,-0.07529,0.04177,0.6777,-0.0115,-0.0741,0.04742,0.7081
|
||||||
|
eohi_pers,cal_35,1063,0.01428,-0.04037,0.07111,0.6418,0.01092,-0.04486,0.06885,0.7222
|
||||||
|
eohiDGEN_pers,cal_global,1063,-0.00723,-0.06312,0.05198,0.81387,-0.01039,-0.06483,0.05266,0.73501
|
||||||
|
eohi_mean,cal_35,1063,0.02233,-0.03257,0.08046,0.46697,-0.01029,-0.06915,0.04512,0.73748
|
||||||
|
eohi_pref,cal_global,1063,0.0183,-0.03841,0.07643,0.55118,0.00972,-0.0475,0.07007,0.75165
|
||||||
|
eohi_pers,cal_global,1063,-0.00467,-0.06118,0.05112,0.87911,-0.00916,-0.06635,0.05143,0.76546
|
||||||
|
eohiDGEN_pref,cal_selfActual,1063,0.02085,-0.03875,0.07863,0.49712,0.00839,-0.04939,0.06846,0.78474
|
||||||
|
eohi_val,cal_true,1063,-0.0358,-0.0949,0.02654,0.24352,-0.0081,-0.07342,0.05105,0.79199
|
||||||
|
eohi_pref,cal_15,1063,0.02312,-0.04369,0.0815,0.45148,0.00755,-0.05412,0.06986,0.80582
|
||||||
|
eohi_pref,cal_false,1063,0.02725,-0.03088,0.08754,0.37477,0.00736,-0.05157,0.06691,0.8106
|
||||||
|
eohiDGEN_pref,cal_false,1063,0.00873,-0.04608,0.05948,0.77614,0.00453,-0.05188,0.05691,0.88276
|
||||||
|
eohi_pref,cal_35,1063,0.02663,-0.03409,0.08569,0.38574,0.00424,-0.05505,0.06505,0.89016
|
||||||
|
eohiDGEN_life,cal_global,1063,0.02651,-0.03474,0.08515,0.38781,0.00414,-0.06008,0.06383,0.89275
|
||||||
|
eohiDGEN_val,cal_35,1063,0.0047,-0.04819,0.06294,0.87827,-0.00387,-0.06182,0.05569,0.89972
|
||||||
|
eohi_pers,cal_75,1063,-0.01501,-0.08009,0.05265,0.62493,-0.00292,-0.06324,0.06169,0.92424
|
||||||
|
eohi_life,cal_global,1063,0.01135,-0.04298,0.06703,0.71158,-0.00291,-0.0638,0.0565,0.92459
|
||||||
|
eohiDGEN_life,cal_35,1063,0.01256,-0.04397,0.071,0.68263,-0.00226,-0.06355,0.06168,0.94137
|
||||||
|
eohi_life,cal_55,1063,0.0154,-0.0459,0.07265,0.61609,0.00223,-0.05962,0.05937,0.94216
|
||||||
|
eohiDGEN_mean,cal_35,1063,-0.01765,-0.07148,0.0442,0.5654,-0.0016,-0.05665,0.05409,0.95856
|
||||||
|
eohiDGEN_val,cal_75,1063,0.02365,-0.04062,0.08878,0.44107,-0.00147,-0.06785,0.06103,0.96186
|
||||||
|
eohi_pref,cal_true,1063,-0.00826,-0.06273,0.05423,0.78791,0.00098,-0.05588,0.06502,0.97445
|
||||||
|
BIN
eohi1/eohi_process.xlsm
Normal file
1073
eohi1/exp1.csv
Normal file
33
eohi1/exp1_TF_descriptives.csv
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
variable,accuracy,TF,n_total,n_correct,prop_correct,match_difficulty,Table A1,Table A2,match_difficulty,,,,,,,
|
||||||
|
gest_75_T,75,T,1063,952,0.89558,NO,0.75,0.9,M,,,,,,,
|
||||||
|
dors_55_T,55,T,1063,949,0.89276,NO,0.64,0.96,M,,,,,,,
|
||||||
|
chur_75_T,75,T,1063,947,0.89087,NO,0.83,0.9,M,,,,,,,
|
||||||
|
mons_55_T,55,T,1063,944,0.88805,NO,0.55,0.82,H,,,,,,,
|
||||||
|
lock_35_T,35,T,1063,923,0.8683,NO,0.45,0.9,M,,,,,,,
|
||||||
|
hume_35_F,35,F,1063,873,0.82126,NO,0.43,0.94,M,,,,,,,
|
||||||
|
papy_75_T,75,T,1063,873,0.82126,YES,0.78,0.92,M,,,,,,,
|
||||||
|
sham_55_T,55,T,1063,860,0.80903,NO,0.57,1,M,,,,,,,0.665565
|
||||||
|
list_75_F,75,F,1063,855,0.80433,YES,0.85,0.96,L,,,,,,,
|
||||||
|
cons_55_T,55,T,1063,816,0.76764,NO,0.57,0.9,M,,,,,,,
|
||||||
|
tsun_75_T,75,T,1063,813,0.76482,YES,0.8,0.96,L,,,,,,,
|
||||||
|
pana_35_T,35,T,1063,805,0.75729,NO,0.37,0.65,H,,,,,,,
|
||||||
|
kabu_15_T,15,T,1063,791,0.74412,NO,0.25,0.76,M,,,,,,,
|
||||||
|
gulf_15_F,15,F,1063,773,0.72719,NO,0.16,0.24,H,,,,,,,
|
||||||
|
oedi_35_T,35,T,1063,767,0.72154,NO,0.43,0.88,M,,,,,,,
|
||||||
|
vaud_15_T,15,T,1063,766,0.7206,NO,0.24,0.41,H,,,,,,,
|
||||||
|
mont_35_F,35,F,1063,748,0.70367,NO,0.36,0.78,M,,,,,,,
|
||||||
|
demo_15_F,15,F,1063,727,0.68391,NO,0.19,0.39,H,,,,,,,
|
||||||
|
spee_75_F,75,F,1063,688,0.64722,NO,0.78,0.98,L,,,,,,,
|
||||||
|
dwar_55_F,55,F,1063,663,0.62371,YES,0.64,0.76,L,,,,,,,
|
||||||
|
carb_35_T,35,T,1063,629,0.59172,NO,0.39,0.31,H,,,,,,,
|
||||||
|
bohr_15_T,15,T,1063,626,0.5889,NO,0.16,0.37,H,,,,,,,
|
||||||
|
gang_15_F,15,F,1063,572,0.5381,NO,0.21,0.39,H,,,,,,,
|
||||||
|
vitc_55_F,55,F,1063,556,0.52305,NO,0.63,0.84,L,,,,,,,
|
||||||
|
hert_35_F,35,F,1063,551,0.51834,NO,0.41,0.65,M,,,,,,,
|
||||||
|
pucc_15_F,15,F,1063,543,0.51082,NO,0.15,0.22,H,,,,,,,
|
||||||
|
troy_15_T,15,T,1063,504,0.47413,NO,0.22,0.49,M,,,,,,,
|
||||||
|
moza_55_F,55,F,1063,500,0.47037,NO,0.59,0.71,L,,,,,,,
|
||||||
|
croc_75_F,75,F,1063,433,0.40734,NO,0.76,0.88,L,,,,,,,
|
||||||
|
gees_55_F,55,F,1063,324,0.3048,NO,0.57,0.84,L,,,,,,,
|
||||||
|
lute_35_F,35,F,1063,322,0.30292,NO,0.46,0.67,L,,,,,,,
|
||||||
|
memo_75_F,75,F,1063,265,0.24929,NO,0.83,0.88,L,,,,,,,
|
||||||
|
BIN
eohi1/interaction_DGEN_assumptions.png
Normal file
|
After Width: | Height: | Size: 19 KiB |
BIN
eohi1/interaction_domain_assumptions.png
Normal file
|
After Width: | Height: | Size: 20 KiB |
BIN
eohi1/linearity_plots.pdf
Normal file
875
eohi1/mixed anova - DGEN.r
Normal file
@ -0,0 +1,875 @@
|
|||||||
|
# Mixed ANOVA Analysis for DGEN Variables
|
||||||
|
# EOHI Experiment Data Analysis - DGEN Level Analysis
|
||||||
|
# Variables: pastPref_DGEN, pastPers_DGEN, pastVal_DGEN, pastLife_DGEN
|
||||||
|
# futPref_DGEN, futPers_DGEN, futVal_DGEN, futLife_DGEN
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(tidyverse)
|
||||||
|
library(ez)
|
||||||
|
library(car)
|
||||||
|
library(afex) # For aov_ez (cleaner ANOVA output)
|
||||||
|
library(nortest) # For normality tests
|
||||||
|
library(emmeans) # For post-hoc comparisons
|
||||||
|
library(purrr) # For map functions
|
||||||
|
library(effsize) # For Cohen's d calculations
|
||||||
|
library(effectsize) # For effect size calculations
|
||||||
|
|
||||||
|
# Global options to remove scientific notation
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
|
||||||
|
options(contrasts = c("contr.sum", "contr.poly"))
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Verify the specific variables we need
|
||||||
|
required_vars <- c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
|
||||||
|
"futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN")
|
||||||
|
|
||||||
|
missing_vars <- required_vars[!required_vars %in% colnames(data)]
|
||||||
|
if (length(missing_vars) > 0) {
|
||||||
|
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define domain mapping
|
||||||
|
domain_mapping <- data.frame(
|
||||||
|
variable = c("pastPref_DGEN", "pastPers_DGEN", "pastVal_DGEN", "pastLife_DGEN",
|
||||||
|
"futPref_DGEN", "futPers_DGEN", "futVal_DGEN", "futLife_DGEN"),
|
||||||
|
time = c(rep("Past", 4), rep("Future", 4)),
|
||||||
|
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Efficient data pivoting using pivot_longer
|
||||||
|
long_data <- data %>%
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||||
|
pivot_longer(
|
||||||
|
cols = all_of(required_vars),
|
||||||
|
names_to = "variable",
|
||||||
|
values_to = "DGEN_SCORE"
|
||||||
|
) %>%
|
||||||
|
left_join(domain_mapping, by = "variable") %>%
|
||||||
|
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||||
|
mutate(
|
||||||
|
TIME = factor(time, levels = c("Past", "Future")),
|
||||||
|
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||||
|
pID = as.factor(pID),
|
||||||
|
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||||
|
) %>%
|
||||||
|
# Select final columns and remove any rows with missing values
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, DGEN_SCORE) %>%
|
||||||
|
filter(!is.na(DGEN_SCORE))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DESCRIPTIVE STATISTICS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Overall descriptive statistics by TIME and DOMAIN
|
||||||
|
desc_stats <- long_data %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
median = round(median(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
q1 = round(quantile(DGEN_SCORE, 0.25, na.rm = TRUE), 5),
|
||||||
|
q3 = round(quantile(DGEN_SCORE, 0.75, na.rm = TRUE), 5),
|
||||||
|
min = round(min(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
max = round(max(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TIME and DOMAIN:")
|
||||||
|
print(desc_stats)
|
||||||
|
|
||||||
|
# Descriptive statistics by between-subjects factors
|
||||||
|
desc_stats_by_temporal <- long_data %>%
|
||||||
|
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(DGEN_SCORE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TEMPORAL_DO, TIME, and DOMAIN:")
|
||||||
|
print(desc_stats_by_temporal)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ASSUMPTION TESTING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Remove missing values for assumption testing
|
||||||
|
long_data_clean <- long_data[!is.na(long_data$DGEN_SCORE), ]
|
||||||
|
|
||||||
|
# 1. Missing values check
|
||||||
|
missing_summary <- long_data %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n_total = n(),
|
||||||
|
n_missing = sum(is.na(DGEN_SCORE)),
|
||||||
|
pct_missing = round(100 * n_missing / n_total, 2),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing values by TIME and DOMAIN:")
|
||||||
|
print(missing_summary)
|
||||||
|
|
||||||
|
# 2. Outlier detection
|
||||||
|
outlier_summary <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = mean(DGEN_SCORE),
|
||||||
|
sd = sd(DGEN_SCORE),
|
||||||
|
q1 = quantile(DGEN_SCORE, 0.25),
|
||||||
|
q3 = quantile(DGEN_SCORE, 0.75),
|
||||||
|
iqr = q3 - q1,
|
||||||
|
lower_bound = q1 - 1.5 * iqr,
|
||||||
|
upper_bound = q3 + 1.5 * iqr,
|
||||||
|
n_outliers = sum(DGEN_SCORE < lower_bound | DGEN_SCORE > upper_bound),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Outlier summary (IQR method):")
|
||||||
|
print(outlier_summary)
|
||||||
|
|
||||||
|
# 3. Anderson-Darling normality test (streamlined)
|
||||||
|
normality_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
ad_statistic = ad.test(.data$DGEN_SCORE)$statistic,
|
||||||
|
ad_p_value = ad.test(.data$DGEN_SCORE)$p.value,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Anderson-Darling normality test results:")
|
||||||
|
# Round only the numeric columns
|
||||||
|
normality_results_rounded <- normality_results %>%
|
||||||
|
mutate(across(where(is.numeric), ~ round(.x, 5)))
|
||||||
|
print(normality_results_rounded)
|
||||||
|
|
||||||
|
# 4. Homogeneity of variance (Levene's test)
|
||||||
|
# Test homogeneity across TIME within each DOMAIN
|
||||||
|
homogeneity_time <- long_data_clean %>%
|
||||||
|
group_by(DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(DGEN_SCORE ~ TIME)$`F value`[1],
|
||||||
|
levene_p = leveneTest(DGEN_SCORE ~ TIME)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across TIME within each DOMAIN:")
|
||||||
|
print(homogeneity_time)
|
||||||
|
|
||||||
|
# Test homogeneity across DOMAIN within each TIME
|
||||||
|
homogeneity_domain <- long_data_clean %>%
|
||||||
|
group_by(TIME) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(DGEN_SCORE ~ DOMAIN)$`F value`[1],
|
||||||
|
levene_p = leveneTest(DGEN_SCORE ~ DOMAIN)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across DOMAIN within each TIME:")
|
||||||
|
print(homogeneity_domain)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Function to calculate Hartley's F-max ratio
|
||||||
|
calculate_hartley_ratio <- function(variances) {
|
||||||
|
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||||
|
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||||
|
|
||||||
|
print("=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × DOMAIN combination ===")
|
||||||
|
|
||||||
|
observed_temporal_ratios <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||||
|
past_var = var(DGEN_SCORE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||||
|
fut_var = var(DGEN_SCORE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||||
|
# Calculate F-max ratio
|
||||||
|
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||||
|
|
||||||
|
print(observed_temporal_ratios)
|
||||||
|
|
||||||
|
# More efficient bootstrap function for Hartley's F-max test
|
||||||
|
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||||
|
# Get unique groups and their sample sizes
|
||||||
|
groups <- unique(data[[group_var]])
|
||||||
|
|
||||||
|
# Calculate observed variances for each group
|
||||||
|
observed_vars <- data %>%
|
||||||
|
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||||
|
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||||
|
dplyr::pull(var)
|
||||||
|
|
||||||
|
# Handle invalid variances
|
||||||
|
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||||
|
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate observed F-max ratio
|
||||||
|
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||||
|
|
||||||
|
# Pre-allocate storage for bootstrap ratios
|
||||||
|
bootstrap_ratios <- numeric(n_iter)
|
||||||
|
|
||||||
|
# Get group data once
|
||||||
|
group_data_list <- map(groups, ~ {
|
||||||
|
group_data <- data[data[[group_var]] == .x, response_var]
|
||||||
|
group_data[!is.na(group_data)]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Bootstrap with pre-allocated storage
|
||||||
|
for(i in 1:n_iter) {
|
||||||
|
# Bootstrap sample from each group independently
|
||||||
|
sample_vars <- map_dbl(group_data_list, ~ {
|
||||||
|
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||||
|
var(bootstrap_sample, na.rm = TRUE)
|
||||||
|
})
|
||||||
|
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove invalid ratios
|
||||||
|
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||||
|
|
||||||
|
if(length(valid_ratios) == 0) {
|
||||||
|
stop("No valid bootstrap ratios generated")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate critical value (95th percentile)
|
||||||
|
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Return only essential information
|
||||||
|
return(list(
|
||||||
|
observed_ratio = observed_ratio,
|
||||||
|
critical_95 = critical_95,
|
||||||
|
n_valid_iterations = length(valid_ratios)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||||
|
print("=== HARTLEY'S F-MAX TEST RESULTS ===")
|
||||||
|
set.seed(123) # For reproducibility
|
||||||
|
|
||||||
|
hartley_temporal_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, DGEN_SCORE), "TEMPORAL_DO", "DGEN_SCORE")),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||||
|
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||||
|
significant = observed_ratio > critical_95
|
||||||
|
) %>%
|
||||||
|
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||||
|
|
||||||
|
print(hartley_temporal_results)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MIXED ANOVA ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Check if design is balanced
|
||||||
|
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||||
|
if(all(design_balance %in% c(0, 1))) {
|
||||||
|
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||||
|
} else {
|
||||||
|
print("Warning: Design is unbalanced")
|
||||||
|
print(summary(as.vector(design_balance)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("=== MIXED ANOVA RESULTS (with sphericity corrections) ===")
|
||||||
|
|
||||||
|
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||||
|
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||||
|
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||||
|
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||||
|
dv = DGEN_SCORE,
|
||||||
|
wid = pID,
|
||||||
|
between = TEMPORAL_DO,
|
||||||
|
within = .(TIME, DOMAIN),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
anova_output <- mixed_anova_model$ANOVA
|
||||||
|
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||||
|
print(anova_output)
|
||||||
|
|
||||||
|
# Show Mauchly's test for sphericity
|
||||||
|
print("Mauchly's Test of Sphericity:")
|
||||||
|
print(mixed_anova_model$Mauchly)
|
||||||
|
|
||||||
|
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||||
|
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||||
|
print("Greenhouse-Geisser and Huynh-Feldt Corrections:")
|
||||||
|
print(mixed_anova_model$`Sphericity Corrections`)
|
||||||
|
|
||||||
|
# Extract and display corrected degrees of freedom
|
||||||
|
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
|
||||||
|
|
||||||
|
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||||
|
anova_table <- mixed_anova_model$ANOVA
|
||||||
|
|
||||||
|
corrected_df <- data.frame(
|
||||||
|
Effect = sphericity_corr$Effect,
|
||||||
|
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
GG_epsilon = sphericity_corr$GGe,
|
||||||
|
HF_epsilon = sphericity_corr$HFe
|
||||||
|
)
|
||||||
|
|
||||||
|
print(corrected_df)
|
||||||
|
|
||||||
|
cat("\n=== CORRECTED F-TESTS ===\n")
|
||||||
|
for(i in seq_len(nrow(corrected_df))) {
|
||||||
|
effect <- corrected_df$Effect[i]
|
||||||
|
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||||
|
|
||||||
|
cat(sprintf("\n%s:\n", effect))
|
||||||
|
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||||
|
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||||
|
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||||
|
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# EFFECT SIZES (GENERALIZED ETA SQUARED)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
|
||||||
|
|
||||||
|
# Extract generalized eta squared from ezANOVA (already calculated)
|
||||||
|
effect_sizes <- mixed_anova_model$ANOVA[, c("Effect", "ges")]
|
||||||
|
effect_sizes$ges <- round(effect_sizes$ges, 5)
|
||||||
|
print("Generalized Eta Squared:")
|
||||||
|
print(effect_sizes)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# POST-HOC COMPARISONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Post-hoc comparisons using emmeans
|
||||||
|
print("\n=== POST-HOC COMPARISONS ===")
|
||||||
|
|
||||||
|
# Create aov model for emmeans (emmeans requires aov object, not ezANOVA output)
|
||||||
|
aov_model <- aov(DGEN_SCORE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||||
|
data = long_data_clean)
|
||||||
|
|
||||||
|
# Main effect of TIME
|
||||||
|
print("Main Effect of TIME:")
|
||||||
|
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
|
||||||
|
print(time_contrasts)
|
||||||
|
|
||||||
|
# Main effect of DOMAIN
|
||||||
|
print("\nMain Effect of DOMAIN:")
|
||||||
|
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(domain_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
domain_contrasts <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||||
|
print(domain_contrasts)
|
||||||
|
|
||||||
|
# Main effect of TEMPORAL_DO
|
||||||
|
print("\nMain Effect of TEMPORAL_DO:")
|
||||||
|
temporal_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO)
|
||||||
|
temporal_contrasts <- pairs(temporal_emmeans, adjust = "bonferroni")
|
||||||
|
print(temporal_contrasts)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# INTERACTION EXPLORATIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# TEMPORAL_DO × TIME Interaction
|
||||||
|
print("\n=== TEMPORAL_DO × TIME INTERACTION ===")
|
||||||
|
temporal_time_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(temporal_time_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TIME within each TEMPORAL_DO:")
|
||||||
|
temporal_time_simple <- pairs(temporal_time_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
|
||||||
|
print(temporal_time_simple)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TEMPORAL_DO within each TIME:")
|
||||||
|
temporal_time_simple2 <- pairs(temporal_time_emmeans, by = "TIME", adjust = "bonferroni")
|
||||||
|
print(temporal_time_simple2)
|
||||||
|
|
||||||
|
# TIME × DOMAIN Interaction
|
||||||
|
print("\n=== TIME × DOMAIN INTERACTION ===")
|
||||||
|
time_domain_emmeans <- emmeans(aov_model, ~ TIME * DOMAIN)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_domain_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of DOMAIN within each TIME:")
|
||||||
|
time_domain_simple <- pairs(time_domain_emmeans, by = "TIME", adjust = "bonferroni")
|
||||||
|
print(time_domain_simple)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TIME within each DOMAIN:")
|
||||||
|
time_domain_simple2 <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
|
||||||
|
print(time_domain_simple2)
|
||||||
|
|
||||||
|
# TEMPORAL_DO × DOMAIN Interaction
|
||||||
|
print("\n=== TEMPORAL_DO × DOMAIN INTERACTION ===")
|
||||||
|
temporal_domain_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(temporal_domain_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of DOMAIN within each TEMPORAL_DO:")
|
||||||
|
temporal_domain_simple <- pairs(temporal_domain_emmeans, by = "TEMPORAL_DO", adjust = "bonferroni")
|
||||||
|
print(temporal_domain_simple)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TEMPORAL_DO within each DOMAIN:")
|
||||||
|
temporal_domain_simple2 <- pairs(temporal_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
|
||||||
|
print(temporal_domain_simple2)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# THREE-WAY INTERACTION ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== THREE-WAY INTERACTION ANALYSIS ===")
|
||||||
|
three_way_emmeans <- emmeans(aov_model, ~ TEMPORAL_DO * TIME * DOMAIN)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(three_way_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TIME within each TEMPORAL_DO × DOMAIN combination:")
|
||||||
|
three_way_contrasts <- pairs(three_way_emmeans, by = c("TEMPORAL_DO", "DOMAIN"), adjust = "bonferroni")
|
||||||
|
print(three_way_contrasts)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Cohen's d calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
|
||||||
|
|
||||||
|
# Function to calculate Cohen's d for pairwise comparisons
|
||||||
|
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||||
|
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_pairs) > 0) {
|
||||||
|
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||||
|
print(significant_pairs)
|
||||||
|
|
||||||
|
cat("\nCohen's d calculated from raw data:\n")
|
||||||
|
|
||||||
|
for(i in seq_len(nrow(significant_pairs))) {
|
||||||
|
comparison <- significant_pairs[i, ]
|
||||||
|
contrast_name <- as.character(comparison$contrast)
|
||||||
|
|
||||||
|
# Parse the contrast
|
||||||
|
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||||
|
if(length(contrast_parts) == 2) {
|
||||||
|
level1 <- trimws(contrast_parts[1])
|
||||||
|
level2 <- trimws(contrast_parts[2])
|
||||||
|
|
||||||
|
# Get raw data for both conditions
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
group2_level <- as.character(comparison[[group2_var]])
|
||||||
|
data1 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level1 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
|
||||||
|
data2 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level2 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
} else {
|
||||||
|
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||||
|
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||||
|
}
|
||||||
|
|
||||||
|
if(length(data1) > 0 && length(data2) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(data1, data2)
|
||||||
|
|
||||||
|
cat(sprintf("Comparison: %s", contrast_name))
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
cat(sprintf(" | %s", group2_level))
|
||||||
|
}
|
||||||
|
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant pairwise comparisons found.\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR TEMPORAL_DO × TIME INTERACTION ===")
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each TEMPORAL_DO
|
||||||
|
temporal_time_simple_df <- as.data.frame(temporal_time_simple)
|
||||||
|
calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "DGEN_SCORE")
|
||||||
|
|
||||||
|
# Get simple effects of TEMPORAL_DO within each TIME
|
||||||
|
temporal_time_simple2_df <- as.data.frame(temporal_time_simple2)
|
||||||
|
calculate_cohens_d_for_pairs(temporal_time_simple2_df, long_data_clean, "TEMPORAL_DO", "TIME", "DGEN_SCORE")
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 2. TIME × DOMAIN INTERACTION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each DOMAIN
|
||||||
|
time_domain_simple2_df <- as.data.frame(time_domain_simple2)
|
||||||
|
calculate_cohens_d_for_pairs(time_domain_simple2_df, long_data_clean, "TIME", "DOMAIN", "DGEN_SCORE")
|
||||||
|
|
||||||
|
# Get simple effects of DOMAIN within each TIME
|
||||||
|
time_domain_simple_df <- as.data.frame(time_domain_simple)
|
||||||
|
calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "DGEN_SCORE")
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 3. TEMPORAL_DO × DOMAIN INTERACTION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR TEMPORAL_DO × DOMAIN INTERACTION ===")
|
||||||
|
|
||||||
|
# Get simple effects of TEMPORAL_DO within each DOMAIN
|
||||||
|
temporal_domain_simple2_df <- as.data.frame(temporal_domain_simple2)
|
||||||
|
calculate_cohens_d_for_pairs(temporal_domain_simple2_df, long_data_clean, "TEMPORAL_DO", "DOMAIN", "DGEN_SCORE")
|
||||||
|
|
||||||
|
# Get simple effects of DOMAIN within each TEMPORAL_DO
|
||||||
|
temporal_domain_simple_df <- as.data.frame(temporal_domain_simple)
|
||||||
|
calculate_cohens_d_for_pairs(temporal_domain_simple_df, long_data_clean, "DOMAIN", "TEMPORAL_DO", "DGEN_SCORE")
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 4. THREE-WAY INTERACTION COHEN'S D
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR THREE-WAY INTERACTION ===")
|
||||||
|
|
||||||
|
# Get pairwise comparisons for the three-way interaction
|
||||||
|
three_way_contrasts_df <- as.data.frame(three_way_contrasts)
|
||||||
|
print("The pairwise comparisons show the TIME effects within each TEMPORAL_DO × DOMAIN combination:")
|
||||||
|
print(three_way_contrasts_df)
|
||||||
|
|
||||||
|
# Calculate Cohen's d for significant three-way interaction effects
|
||||||
|
print("\nCohen's d calculations for significant TIME effects within each TEMPORAL_DO × DOMAIN combination:")
|
||||||
|
|
||||||
|
# Extract significant comparisons (p < 0.05)
|
||||||
|
significant_three_way <- three_way_contrasts_df[three_way_contrasts_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_three_way) > 0) {
|
||||||
|
for(i in seq_len(nrow(significant_three_way))) {
|
||||||
|
comparison <- significant_three_way[i, ]
|
||||||
|
|
||||||
|
# Extract the grouping variables
|
||||||
|
temporal_do_level <- as.character(comparison$TEMPORAL_DO)
|
||||||
|
domain_level <- as.character(comparison$DOMAIN)
|
||||||
|
|
||||||
|
# Get data for Past and Future within this TEMPORAL_DO × DOMAIN combination
|
||||||
|
past_data <- long_data_clean$DGEN_SCORE[
|
||||||
|
long_data_clean$TEMPORAL_DO == temporal_do_level &
|
||||||
|
long_data_clean$DOMAIN == domain_level &
|
||||||
|
long_data_clean$TIME == "Past"
|
||||||
|
]
|
||||||
|
|
||||||
|
future_data <- long_data_clean$DGEN_SCORE[
|
||||||
|
long_data_clean$TEMPORAL_DO == temporal_do_level &
|
||||||
|
long_data_clean$DOMAIN == domain_level &
|
||||||
|
long_data_clean$TIME == "Future"
|
||||||
|
]
|
||||||
|
|
||||||
|
if(length(past_data) > 0 && length(future_data) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(past_data, future_data)
|
||||||
|
|
||||||
|
cat(sprintf("TEMPORAL_DO = %s, DOMAIN = %s:\n", temporal_do_level, domain_level))
|
||||||
|
cat(sprintf(" Past vs Future comparison\n"))
|
||||||
|
cat(sprintf(" n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat(sprintf(" Estimated difference: %.5f\n", comparison$estimate))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant TIME effects found within any TEMPORAL_DO × DOMAIN combination.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# INTERACTION PLOTS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("=== INTERACTION PLOTS ===")
|
||||||
|
|
||||||
|
# Define color palette for DOMAIN (4 levels)
|
||||||
|
domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F",
|
||||||
|
"Values" = "#FFB000", "Life" = "#FE6100")
|
||||||
|
|
||||||
|
# TEMPORAL_DO × DOMAIN INTERACTION PLOT
|
||||||
|
# Create estimated marginal means for TEMPORAL_DO × DOMAIN
|
||||||
|
emm_temporal_domain <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
|
||||||
|
|
||||||
|
# Prepare emmeans data frame
|
||||||
|
emmeans_temporal_domain <- emm_temporal_domain %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||||
|
rename(
|
||||||
|
ci_lower = lower.CL,
|
||||||
|
ci_upper = upper.CL,
|
||||||
|
plot_mean = emmean
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
|
||||||
|
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare raw data for plotting
|
||||||
|
iPlot_temporal_domain <- long_data_clean %>%
|
||||||
|
dplyr::select(pID, TEMPORAL_DO, DOMAIN, DGEN_SCORE) %>%
|
||||||
|
mutate(
|
||||||
|
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
|
||||||
|
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create TEMPORAL_DO × DOMAIN interaction plot - clean line plot with distribution
|
||||||
|
# Convert to numeric x-axis and add position offsets for dodging
|
||||||
|
dodge_width <- 0.6
|
||||||
|
iPlot_temporal_domain <- iPlot_temporal_domain %>%
|
||||||
|
mutate(
|
||||||
|
x_pos = as.numeric(TEMPORAL_DO),
|
||||||
|
domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
|
||||||
|
x_dodged = x_pos + domain_offset
|
||||||
|
)
|
||||||
|
|
||||||
|
emmeans_temporal_domain <- emmeans_temporal_domain %>%
|
||||||
|
mutate(
|
||||||
|
x_pos = as.numeric(TEMPORAL_DO),
|
||||||
|
domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
|
||||||
|
x_dodged = x_pos + domain_offset
|
||||||
|
)
|
||||||
|
|
||||||
|
interaction_plot_temporal_domain <- ggplot() +
|
||||||
|
# Distribution layer - violins (completely separated)
|
||||||
|
geom_violin(
|
||||||
|
data = iPlot_temporal_domain,
|
||||||
|
aes(x = x_dodged, y = DGEN_SCORE, fill = DOMAIN, group = interaction(x_pos, DOMAIN)),
|
||||||
|
alpha = 0.4,
|
||||||
|
color = NA,
|
||||||
|
trim = FALSE,
|
||||||
|
scale = "width",
|
||||||
|
width = dodge_width / 4
|
||||||
|
) +
|
||||||
|
# Emmeans error bars
|
||||||
|
geom_errorbar(
|
||||||
|
data = emmeans_temporal_domain,
|
||||||
|
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper),
|
||||||
|
width = 0.08,
|
||||||
|
linewidth = 0.8,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
# Emmeans points
|
||||||
|
geom_point(
|
||||||
|
data = emmeans_temporal_domain,
|
||||||
|
aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
|
||||||
|
size = 4,
|
||||||
|
stroke = 1,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
x = "Order",
|
||||||
|
y = "Mean absolute difference from present",
|
||||||
|
title = "TEMPORAL_DO × DOMAIN Interaction"
|
||||||
|
) +
|
||||||
|
scale_x_continuous(
|
||||||
|
breaks = c(1, 2),
|
||||||
|
labels = c("Past First", "Future First"),
|
||||||
|
limits = c(0.4, 2.6)
|
||||||
|
) +
|
||||||
|
scale_y_continuous(
|
||||||
|
limits = c(0, 10),
|
||||||
|
breaks = seq(0, 10, 2)
|
||||||
|
) +
|
||||||
|
scale_color_manual(name = "Domain", values = domain_colors) +
|
||||||
|
scale_fill_manual(name = "Domain", values = domain_colors) +
|
||||||
|
scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
|
||||||
|
theme_minimal(base_size = 13) +
|
||||||
|
theme(
|
||||||
|
axis.text = element_text(size = 11),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.title = element_text(size = 14, hjust = 0.5),
|
||||||
|
legend.position = "right",
|
||||||
|
legend.title = element_text(size = 11),
|
||||||
|
legend.text = element_text(size = 10),
|
||||||
|
panel.grid.major.x = element_blank(),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(interaction_plot_temporal_domain)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# EMMEANS-ONLY PLOT: TEMPORAL_DO × DOMAIN INTERACTION
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Create fresh emmeans data for emmeans-only plot
|
||||||
|
emm_temporal_domain_simple <- emmeans(aov_model, ~ TEMPORAL_DO * DOMAIN)
|
||||||
|
|
||||||
|
emmeans_temporal_domain_simple <- emm_temporal_domain_simple %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||||
|
rename(
|
||||||
|
ci_lower = lower.CL,
|
||||||
|
ci_upper = upper.CL,
|
||||||
|
plot_mean = emmean
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
|
||||||
|
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||||
|
x_pos = as.numeric(TEMPORAL_DO),
|
||||||
|
domain_offset = (as.numeric(DOMAIN) - 2.5) * (dodge_width / 4),
|
||||||
|
x_dodged = x_pos + domain_offset
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create emmeans-only plot
|
||||||
|
interaction_plot_emmeans_only <- ggplot(emmeans_temporal_domain_simple) +
|
||||||
|
geom_errorbar(
|
||||||
|
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = DOMAIN),
|
||||||
|
width = 0.1,
|
||||||
|
linewidth = 1,
|
||||||
|
alpha = 0.8
|
||||||
|
) +
|
||||||
|
geom_point(
|
||||||
|
aes(x = x_dodged, y = plot_mean, fill = DOMAIN, shape = DOMAIN),
|
||||||
|
size = 5,
|
||||||
|
stroke = 1.2,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
x = "Order",
|
||||||
|
y = "Absolute difference from the present",
|
||||||
|
title = "TEMPORAL_DO × DOMAIN Interaction (Estimated Marginal Means)"
|
||||||
|
) +
|
||||||
|
scale_x_continuous(
|
||||||
|
breaks = c(1, 2),
|
||||||
|
labels = c("Past First", "Future First"),
|
||||||
|
limits = c(0.4, 2.6)
|
||||||
|
) +
|
||||||
|
scale_y_continuous(
|
||||||
|
limits = c(3, 6),
|
||||||
|
breaks = seq(0, 10, 1)
|
||||||
|
) +
|
||||||
|
scale_color_manual(name = "Domain", values = domain_colors) +
|
||||||
|
scale_fill_manual(name = "Domain", values = domain_colors) +
|
||||||
|
scale_shape_manual(name = "Domain", values = c(21, 22, 23, 24)) +
|
||||||
|
theme_minimal(base_size = 13) +
|
||||||
|
theme(
|
||||||
|
axis.text = element_text(size = 11),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.title = element_text(size = 14, hjust = 0.5),
|
||||||
|
legend.position = "right",
|
||||||
|
legend.title = element_text(size = 11),
|
||||||
|
legend.text = element_text(size = 10),
|
||||||
|
panel.grid.major.x = element_blank(),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(interaction_plot_emmeans_only)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MAIN EFFECT PLOT: TIME (Emmeans + Error Bars Only)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Prepare emmeans data frame for TIME main effect
|
||||||
|
time_main_emm_df <- time_emmeans %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||||
|
rename(
|
||||||
|
ci_lower = lower.CL,
|
||||||
|
ci_upper = upper.CL,
|
||||||
|
plot_mean = emmean
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
TIME = factor(TIME, levels = c("Past", "Future"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define color palette for TIME
|
||||||
|
time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
|
||||||
|
|
||||||
|
# Create TIME main-effect plot (style aligned with existing emmeans-only plot)
|
||||||
|
time_main_plot <- ggplot(time_main_emm_df) +
|
||||||
|
geom_errorbar(
|
||||||
|
aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
|
||||||
|
width = 0.15,
|
||||||
|
linewidth = 1,
|
||||||
|
alpha = 0.8
|
||||||
|
) +
|
||||||
|
geom_point(
|
||||||
|
aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
|
||||||
|
size = 5,
|
||||||
|
stroke = 1.2,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
x = "Time",
|
||||||
|
y = "Absolute difference from the present",
|
||||||
|
title = "Main Effect of TIME (Estimated Marginal Means)"
|
||||||
|
) +
|
||||||
|
scale_color_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
|
||||||
|
theme_minimal(base_size = 13) +
|
||||||
|
theme(
|
||||||
|
axis.text = element_text(size = 11),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.title = element_text(size = 14, hjust = 0.5),
|
||||||
|
legend.position = "right",
|
||||||
|
legend.title = element_text(size = 11),
|
||||||
|
legend.text = element_text(size = 10),
|
||||||
|
panel.grid.major.x = element_blank(),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(time_main_plot)
|
||||||
769
eohi1/mixed anova - domain means.r
Normal file
@ -0,0 +1,769 @@
|
|||||||
|
# Mixed ANOVA Analysis for Domain Means
|
||||||
|
# EOHI Experiment Data Analysis - Domain Level Analysis
|
||||||
|
# Variables: NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
|
||||||
|
# NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
|
||||||
|
|
||||||
|
library(tidyverse)
|
||||||
|
library(ez)
|
||||||
|
library(car)
|
||||||
|
library(nortest) # For normality tests
|
||||||
|
library(emmeans) # For post-hoc comparisons
|
||||||
|
library(purrr) # For map functions
|
||||||
|
library(effsize) # For Cohen's d calculations
|
||||||
|
library(effectsize) # For effect size calculations
|
||||||
|
library(ggplot2) # For plotting
|
||||||
|
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
options(contrasts = c("contr.sum", "contr.poly"))
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||||
|
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||||
|
|
||||||
|
# Define domain mapping
|
||||||
|
domain_mapping <- data.frame(
|
||||||
|
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||||
|
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||||
|
time = c(rep("Past", 4), rep("Future", 4)),
|
||||||
|
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
long_data <- data %>%
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||||
|
pivot_longer(
|
||||||
|
cols = all_of(required_vars),
|
||||||
|
names_to = "variable",
|
||||||
|
values_to = "MEAN_DIFFERENCE"
|
||||||
|
) %>%
|
||||||
|
left_join(domain_mapping, by = "variable") %>%
|
||||||
|
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||||
|
mutate(
|
||||||
|
TIME = factor(time, levels = c("Past", "Future")),
|
||||||
|
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||||
|
pID = as.factor(pID),
|
||||||
|
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||||
|
) %>%
|
||||||
|
# Select final columns and remove any rows with missing values
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||||
|
filter(!is.na(MEAN_DIFFERENCE))
|
||||||
|
|
||||||
|
# Overall descriptive statistics by TIME and DOMAIN
|
||||||
|
desc_stats <- long_data %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||||
|
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||||
|
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(desc_stats)
|
||||||
|
|
||||||
|
# Descriptive statistics by between-subjects factors
|
||||||
|
desc_stats_by_temporal <- long_data %>%
|
||||||
|
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(desc_stats_by_temporal)
|
||||||
|
|
||||||
|
# ASSUMPTION TESTING
|
||||||
|
|
||||||
|
# 1. Missing values check
|
||||||
|
missing_summary <- long_data %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n_total = n(),
|
||||||
|
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||||
|
pct_missing = round(100 * n_missing / n_total, 2),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(missing_summary)
|
||||||
|
|
||||||
|
# Create clean dataset (long_data is already filtered for NA values)
|
||||||
|
long_data_clean <- long_data
|
||||||
|
|
||||||
|
# 2. Outlier detection
|
||||||
|
outlier_summary <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = mean(MEAN_DIFFERENCE),
|
||||||
|
sd = sd(MEAN_DIFFERENCE),
|
||||||
|
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||||
|
median = median(MEAN_DIFFERENCE),
|
||||||
|
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||||
|
iqr = q3 - q1,
|
||||||
|
lower_bound = q1 - 1.5 * iqr,
|
||||||
|
upper_bound = q3 + 1.5 * iqr,
|
||||||
|
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(outlier_summary)
|
||||||
|
|
||||||
|
# 3. Anderson-Darling normality test (streamlined)
|
||||||
|
normality_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||||
|
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(normality_results)
|
||||||
|
|
||||||
|
# 4. Homogeneity of variance (Levene's test)
|
||||||
|
# Test homogeneity across TIME within each DOMAIN
|
||||||
|
homogeneity_time <- long_data_clean %>%
|
||||||
|
group_by(DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(homogeneity_time)
|
||||||
|
|
||||||
|
# Test homogeneity across DOMAIN within each TIME
|
||||||
|
homogeneity_domain <- long_data_clean %>%
|
||||||
|
group_by(TIME) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print(homogeneity_domain)
|
||||||
|
|
||||||
|
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
|
||||||
|
|
||||||
|
# Function to calculate Hartley's F-max ratio
|
||||||
|
calculate_hartley_ratio <- function(variances) {
|
||||||
|
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||||
|
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||||
|
|
||||||
|
print(unique(long_data_clean$TEMPORAL_DO))
|
||||||
|
print(table(long_data_clean$TEMPORAL_DO))
|
||||||
|
|
||||||
|
observed_temporal_ratios <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||||
|
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||||
|
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||||
|
# Calculate F-max ratio
|
||||||
|
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||||
|
|
||||||
|
print(observed_temporal_ratios)
|
||||||
|
|
||||||
|
# More efficient bootstrap function for Hartley's F-max test
|
||||||
|
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||||
|
# Get unique groups and their sample sizes
|
||||||
|
groups <- unique(data[[group_var]])
|
||||||
|
|
||||||
|
# Calculate observed variances for each group
|
||||||
|
observed_vars <- data %>%
|
||||||
|
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||||
|
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||||
|
dplyr::pull(var)
|
||||||
|
|
||||||
|
# Handle invalid variances
|
||||||
|
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||||
|
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate observed F-max ratio
|
||||||
|
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||||
|
|
||||||
|
# Pre-allocate storage for bootstrap ratios
|
||||||
|
bootstrap_ratios <- numeric(n_iter)
|
||||||
|
|
||||||
|
# Get group data once
|
||||||
|
group_data_list <- map(groups, ~ {
|
||||||
|
group_data <- data[data[[group_var]] == .x, response_var]
|
||||||
|
group_data[!is.na(group_data)]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Bootstrap with pre-allocated storage
|
||||||
|
for(i in 1:n_iter) {
|
||||||
|
# Bootstrap sample from each group independently
|
||||||
|
sample_vars <- map_dbl(group_data_list, ~ {
|
||||||
|
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||||
|
var(bootstrap_sample, na.rm = TRUE)
|
||||||
|
})
|
||||||
|
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove invalid ratios
|
||||||
|
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||||
|
|
||||||
|
if(length(valid_ratios) == 0) {
|
||||||
|
stop("No valid bootstrap ratios generated")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate critical value (95th percentile)
|
||||||
|
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Return only essential information
|
||||||
|
return(list(
|
||||||
|
observed_ratio = observed_ratio,
|
||||||
|
critical_95 = critical_95,
|
||||||
|
n_valid_iterations = length(valid_ratios)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||||
|
set.seed(123) # For reproducibility
|
||||||
|
|
||||||
|
hartley_temporal_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, DOMAIN) %>%
|
||||||
|
summarise(
|
||||||
|
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||||
|
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||||
|
significant = observed_ratio > critical_95
|
||||||
|
) %>%
|
||||||
|
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||||
|
|
||||||
|
print(hartley_temporal_results)
|
||||||
|
|
||||||
|
# MIXED ANOVA ANALYSIS
|
||||||
|
|
||||||
|
# Check for complete cases
|
||||||
|
complete_cases <- sum(complete.cases(long_data_clean))
|
||||||
|
print(complete_cases)
|
||||||
|
|
||||||
|
# Check if design is balanced
|
||||||
|
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||||
|
if(all(design_balance %in% c(0, 1))) {
|
||||||
|
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||||
|
} else {
|
||||||
|
print("Warning: Design is unbalanced")
|
||||||
|
print(summary(as.vector(design_balance)))
|
||||||
|
}
|
||||||
|
|
||||||
|
# MIXED ANOVA WITH SPHERICITY CORRECTIONS
|
||||||
|
|
||||||
|
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||||
|
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||||
|
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||||
|
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = pID,
|
||||||
|
between = TEMPORAL_DO,
|
||||||
|
within = .(TIME, DOMAIN),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
anova_output <- mixed_anova_model$ANOVA
|
||||||
|
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||||
|
print(anova_output)
|
||||||
|
|
||||||
|
# Show Mauchly's test for sphericity
|
||||||
|
print(mixed_anova_model$Mauchly)
|
||||||
|
|
||||||
|
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||||
|
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||||
|
print(mixed_anova_model$`Sphericity Corrections`)
|
||||||
|
|
||||||
|
# Extract and display corrected degrees of freedom
|
||||||
|
|
||||||
|
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||||
|
anova_table <- mixed_anova_model$ANOVA
|
||||||
|
|
||||||
|
corrected_df <- data.frame(
|
||||||
|
Effect = sphericity_corr$Effect,
|
||||||
|
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
GG_epsilon = sphericity_corr$GGe,
|
||||||
|
HF_epsilon = sphericity_corr$HFe
|
||||||
|
)
|
||||||
|
|
||||||
|
print(corrected_df)
|
||||||
|
|
||||||
|
# Between-subjects effects (no sphericity corrections needed)
|
||||||
|
between_effects <- c("TEMPORAL_DO")
|
||||||
|
for(effect in between_effects) {
|
||||||
|
if(effect %in% anova_table$Effect) {
|
||||||
|
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||||
|
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||||
|
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||||
|
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||||
|
|
||||||
|
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Within-subjects effects (sphericity corrections where applicable)
|
||||||
|
|
||||||
|
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||||
|
if("TIME" %in% anova_table$Effect) {
|
||||||
|
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||||
|
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||||
|
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||||
|
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||||
|
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||||
|
}
|
||||||
|
|
||||||
|
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||||
|
if("DOMAIN" %in% anova_table$Effect) {
|
||||||
|
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||||
|
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||||
|
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||||
|
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||||
|
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Interactions with sphericity corrections
|
||||||
|
for(i in seq_len(nrow(corrected_df))) {
|
||||||
|
effect <- corrected_df$Effect[i]
|
||||||
|
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||||
|
|
||||||
|
cat(sprintf("\n%s:\n", effect))
|
||||||
|
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||||
|
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||||
|
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||||
|
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# COHEN'S D FOR MAIN EFFECTS
|
||||||
|
|
||||||
|
# Create aov model for emmeans
|
||||||
|
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||||
|
data = long_data_clean)
|
||||||
|
|
||||||
|
# Main Effect of TIME
|
||||||
|
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||||
|
print(time_emmeans)
|
||||||
|
|
||||||
|
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||||
|
|
||||||
|
time_main_df <- as.data.frame(time_main_contrast)
|
||||||
|
print(time_main_df)
|
||||||
|
|
||||||
|
# Calculate Cohen's d for TIME main effect
|
||||||
|
if(nrow(time_main_df) > 0) {
|
||||||
|
cat("\nCohen's d for TIME main effect:\n")
|
||||||
|
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||||
|
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||||
|
|
||||||
|
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||||
|
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||||
|
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||||
|
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||||
|
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||||
|
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||||
|
print(domain_emmeans)
|
||||||
|
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||||
|
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||||
|
print(domain_main_df)
|
||||||
|
|
||||||
|
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||||
|
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||||
|
if(nrow(significant_domain) > 0) {
|
||||||
|
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||||
|
for(i in seq_len(nrow(significant_domain))) {
|
||||||
|
contrast_name <- as.character(significant_domain$contrast[i])
|
||||||
|
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||||
|
if(length(contrast_parts) == 2) {
|
||||||
|
level1 <- trimws(contrast_parts[1])
|
||||||
|
level2 <- trimws(contrast_parts[2])
|
||||||
|
|
||||||
|
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||||
|
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||||
|
|
||||||
|
if(length(data1) > 0 && length(data2) > 0) {
|
||||||
|
domain_cohens_d <- cohen.d(data1, data2)
|
||||||
|
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||||
|
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# SIMPLE EFFECTS AND PAIRWISE COMPARISONS FOR INTERACTIONS
|
||||||
|
|
||||||
|
# 1. TEMPORAL_DO × TIME INTERACTION - Simple effects of TIME within each TEMPORAL_DO
|
||||||
|
temporal_time_emmeans <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
|
||||||
|
temporal_time_simple <- pairs(temporal_time_emmeans, adjust = "bonferroni")
|
||||||
|
print(temporal_time_simple)
|
||||||
|
|
||||||
|
# 2. TIME × DOMAIN INTERACTION - Simple effects of DOMAIN within each TIME
|
||||||
|
time_domain_emmeans <- emmeans(aov_model, ~ DOMAIN | TIME)
|
||||||
|
time_domain_simple <- pairs(time_domain_emmeans, adjust = "bonferroni")
|
||||||
|
print(time_domain_simple)
|
||||||
|
|
||||||
|
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
|
||||||
|
|
||||||
|
# Function to calculate Cohen's d for pairwise comparisons
|
||||||
|
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||||
|
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_pairs) > 0) {
|
||||||
|
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||||
|
print(significant_pairs)
|
||||||
|
|
||||||
|
cat("\nCohen's d calculated from raw data:\n")
|
||||||
|
|
||||||
|
for(i in seq_len(nrow(significant_pairs))) {
|
||||||
|
comparison <- significant_pairs[i, ]
|
||||||
|
contrast_name <- as.character(comparison$contrast)
|
||||||
|
|
||||||
|
# Parse the contrast
|
||||||
|
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||||
|
if(length(contrast_parts) == 2) {
|
||||||
|
level1 <- trimws(contrast_parts[1])
|
||||||
|
level2 <- trimws(contrast_parts[2])
|
||||||
|
|
||||||
|
# Get raw data for both conditions
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
group2_level <- as.character(comparison[[group2_var]])
|
||||||
|
data1 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level1 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
|
||||||
|
data2 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level2 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
} else {
|
||||||
|
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||||
|
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||||
|
}
|
||||||
|
|
||||||
|
if(length(data1) > 0 && length(data2) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(data1, data2)
|
||||||
|
|
||||||
|
cat(sprintf("Comparison: %s", contrast_name))
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
cat(sprintf(" | %s", group2_level))
|
||||||
|
}
|
||||||
|
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant pairwise comparisons found.\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each TEMPORAL_DO
|
||||||
|
temporal_time_simple_df <- as.data.frame(temporal_time_simple)
|
||||||
|
calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
# 2. TIME × DOMAIN INTERACTION
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each DOMAIN (Past vs Future contrasts for each domain)
|
||||||
|
time_domain_simple_by_domain <- pairs(time_domain_emmeans, by = "DOMAIN", adjust = "bonferroni")
|
||||||
|
time_domain_simple_df <- as.data.frame(time_domain_simple_by_domain)
|
||||||
|
print("=== TIME × DOMAIN INTERACTION: Simple Effects of TIME within each DOMAIN ===")
|
||||||
|
print("Past vs Future contrasts for each domain:")
|
||||||
|
print(time_domain_simple_df)
|
||||||
|
|
||||||
|
# Calculate Cohen's d for Past vs Future contrasts within each domain
|
||||||
|
print("\n=== COHEN'S D FOR TIME CONTRASTS WITHIN EACH DOMAIN ===")
|
||||||
|
significant_time_domain <- time_domain_simple_df[time_domain_simple_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_time_domain) > 0) {
|
||||||
|
print("Significant Past vs Future contrasts within domains (p < 0.05):")
|
||||||
|
print(significant_time_domain)
|
||||||
|
|
||||||
|
print("\nCohen's d calculations for Past vs Future within each domain:")
|
||||||
|
|
||||||
|
for(i in seq_len(nrow(time_domain_simple_df))) {
|
||||||
|
comparison <- time_domain_simple_df[i, ]
|
||||||
|
domain_level <- as.character(comparison$DOMAIN)
|
||||||
|
|
||||||
|
# Get data for Past and Future within this domain
|
||||||
|
past_data <- long_data_clean$MEAN_DIFFERENCE[
|
||||||
|
long_data_clean$DOMAIN == domain_level &
|
||||||
|
long_data_clean$TIME == "Past"
|
||||||
|
]
|
||||||
|
|
||||||
|
future_data <- long_data_clean$MEAN_DIFFERENCE[
|
||||||
|
long_data_clean$DOMAIN == domain_level &
|
||||||
|
long_data_clean$TIME == "Future"
|
||||||
|
]
|
||||||
|
|
||||||
|
if(length(past_data) > 0 && length(future_data) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(past_data, future_data)
|
||||||
|
|
||||||
|
cat(sprintf("Domain: %s\n", domain_level))
|
||||||
|
cat(sprintf(" Past vs Future comparison\n"))
|
||||||
|
cat(sprintf(" n_Past = %d, n_Future = %d\n", length(past_data), length(future_data)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat(sprintf(" Estimated difference: %.5f\n", comparison$estimate))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant Past vs Future contrasts found within any domain.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# INTERACTION PLOTS
|
||||||
|
|
||||||
|
# Define color palettes
|
||||||
|
time_colors <- c("Past" = "#648FFF", "Future" = "#DC267F")
|
||||||
|
domain_colors <- c("Preferences" = "#648FFF", "Personality" = "#DC267F",
|
||||||
|
"Values" = "#FFB000", "Life" = "#FE6100")
|
||||||
|
|
||||||
|
# Define TIME levels (Past, Future order)
|
||||||
|
time_levels <- c("Past", "Future")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# PLOT 3: TEMPORAL_DO × TIME INTERACTION (Emmeans only)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# Create fresh emmeans data for Plot 3
|
||||||
|
emm_temporal_time_plot3 <- emmeans(aov_model, ~ TEMPORAL_DO * TIME)
|
||||||
|
|
||||||
|
emmeans_temporal_time_simple <- emm_temporal_time_plot3 %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||||
|
rename(
|
||||||
|
ci_lower = lower.CL,
|
||||||
|
ci_upper = upper.CL,
|
||||||
|
plot_mean = emmean
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
TEMPORAL_DO = factor(TEMPORAL_DO, levels = c("01PAST", "02FUT")),
|
||||||
|
TIME = factor(TIME, levels = time_levels),
|
||||||
|
x_pos = as.numeric(TEMPORAL_DO),
|
||||||
|
time_offset = (as.numeric(TIME) - 1.5) * 0.3,
|
||||||
|
x_dodged = x_pos + time_offset
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create simple emmeans-only plot
|
||||||
|
interaction_plot_emmeans_only <- ggplot(emmeans_temporal_time_simple) +
|
||||||
|
geom_errorbar(
|
||||||
|
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
|
||||||
|
width = 0.1,
|
||||||
|
linewidth = 1,
|
||||||
|
alpha = 0.8
|
||||||
|
) +
|
||||||
|
geom_point(
|
||||||
|
aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
|
||||||
|
size = 5,
|
||||||
|
stroke = 1.2,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
x = "Order",
|
||||||
|
y = "Mean absolute difference from present",
|
||||||
|
title = "TEMPORAL_DO × TIME Interaction (Estimated Marginal Means)"
|
||||||
|
) +
|
||||||
|
scale_x_continuous(
|
||||||
|
breaks = c(1, 2),
|
||||||
|
labels = c("Past First", "Future First"),
|
||||||
|
limits = c(0.5, 2.5)
|
||||||
|
) +
|
||||||
|
scale_color_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
|
||||||
|
theme_minimal(base_size = 13) +
|
||||||
|
theme(
|
||||||
|
axis.text = element_text(size = 11),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.title = element_text(size = 14, hjust = 0.5),
|
||||||
|
legend.position = "right",
|
||||||
|
legend.title = element_text(size = 11),
|
||||||
|
legend.text = element_text(size = 10),
|
||||||
|
panel.grid.major.x = element_blank(),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(interaction_plot_emmeans_only)
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# PLOT 4: TIME × DOMAIN INTERACTION (TIME on y-axis, DOMAIN on x-axis)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# Create estimated marginal means for TIME × DOMAIN (reusing existing emmeans)
|
||||||
|
emm_time_domain_plot4 <- emmeans(aov_model, ~ TIME * DOMAIN)
|
||||||
|
|
||||||
|
# Prepare emmeans data frame for Plot 4
|
||||||
|
emmeans_time_domain_plot4 <- emm_time_domain_plot4 %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||||
|
rename(
|
||||||
|
ci_lower = lower.CL,
|
||||||
|
ci_upper = upper.CL,
|
||||||
|
plot_mean = emmean
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||||
|
TIME = factor(TIME, levels = time_levels)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Prepare raw data for plotting with position offsets
|
||||||
|
dodge_width_plot4 <- 0.2
|
||||||
|
iPlot_plot4 <- long_data_clean %>%
|
||||||
|
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
|
||||||
|
mutate(
|
||||||
|
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||||
|
TIME = factor(TIME, levels = time_levels),
|
||||||
|
x_pos = as.numeric(DOMAIN),
|
||||||
|
time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
|
||||||
|
x_dodged = x_pos + time_offset
|
||||||
|
)
|
||||||
|
|
||||||
|
emmeans_time_domain_plot4 <- emmeans_time_domain_plot4 %>%
|
||||||
|
mutate(
|
||||||
|
x_pos = as.numeric(DOMAIN),
|
||||||
|
time_offset = (as.numeric(TIME) - 1.5) * dodge_width_plot4,
|
||||||
|
x_dodged = x_pos + time_offset
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create TIME × DOMAIN interaction plot (Domain on x-axis, TIME as groups) - EMMeans only
|
||||||
|
interaction_plot_time_domain_plot4 <- ggplot(emmeans_time_domain_plot4) +
|
||||||
|
geom_errorbar(
|
||||||
|
aes(x = x_dodged, ymin = ci_lower, ymax = ci_upper, color = TIME),
|
||||||
|
width = 0.1,
|
||||||
|
linewidth = 1,
|
||||||
|
alpha = 0.8
|
||||||
|
) +
|
||||||
|
geom_point(
|
||||||
|
aes(x = x_dodged, y = plot_mean, fill = TIME, shape = TIME),
|
||||||
|
size = 5,
|
||||||
|
stroke = 1.2,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
x = "Domain",
|
||||||
|
y = "Mean absolute difference from present",
|
||||||
|
title = "TIME × DOMAIN Interaction (Domain on x-axis) - Estimated Marginal Means"
|
||||||
|
) +
|
||||||
|
scale_x_continuous(
|
||||||
|
breaks = c(1, 2, 3, 4),
|
||||||
|
labels = c("Preferences", "Personality", "Values", "Life"),
|
||||||
|
limits = c(0.5, 4.5)
|
||||||
|
) +
|
||||||
|
scale_color_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
|
||||||
|
theme_minimal(base_size = 13) +
|
||||||
|
theme(
|
||||||
|
axis.text = element_text(size = 11),
|
||||||
|
axis.text.x = element_text(angle = 45, hjust = 1),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.title = element_text(size = 14, hjust = 0.5),
|
||||||
|
legend.position = "right",
|
||||||
|
legend.title = element_text(size = 11),
|
||||||
|
legend.text = element_text(size = 10),
|
||||||
|
panel.grid.major.x = element_blank(),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(interaction_plot_time_domain_plot4)
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# PLOT 5: TIME MAIN EFFECT (Emmeans + Error Bars Only)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# Prepare emmeans data frame for TIME main effect
|
||||||
|
time_main_emm_df <- time_emmeans %>%
|
||||||
|
as.data.frame() %>%
|
||||||
|
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||||
|
rename(
|
||||||
|
ci_lower = lower.CL,
|
||||||
|
ci_upper = upper.CL,
|
||||||
|
plot_mean = emmean
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
TIME = factor(TIME, levels = c("Past", "Future"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create TIME main-effect plot (style aligned with existing emmeans-only plots)
|
||||||
|
time_main_plot <- ggplot(time_main_emm_df) +
|
||||||
|
geom_errorbar(
|
||||||
|
aes(x = TIME, ymin = ci_lower, ymax = ci_upper, color = TIME),
|
||||||
|
width = 0.15,
|
||||||
|
linewidth = 1,
|
||||||
|
alpha = 0.8
|
||||||
|
) +
|
||||||
|
geom_point(
|
||||||
|
aes(x = TIME, y = plot_mean, fill = TIME, shape = TIME),
|
||||||
|
size = 5,
|
||||||
|
stroke = 1.2,
|
||||||
|
color = "black"
|
||||||
|
) +
|
||||||
|
labs(
|
||||||
|
x = "Time",
|
||||||
|
y = "Mean absolute difference from present",
|
||||||
|
title = "Main Effect of TIME (Estimated Marginal Means)"
|
||||||
|
) +
|
||||||
|
scale_color_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_fill_manual(name = "Temporal Direction", values = time_colors) +
|
||||||
|
scale_shape_manual(name = "Temporal Direction", values = c(21, 22)) +
|
||||||
|
theme_minimal(base_size = 13) +
|
||||||
|
theme(
|
||||||
|
axis.text = element_text(size = 11),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.title = element_text(size = 14, hjust = 0.5),
|
||||||
|
legend.position = "right",
|
||||||
|
legend.title = element_text(size = 11),
|
||||||
|
legend.text = element_text(size = 10),
|
||||||
|
panel.grid.major.x = element_blank(),
|
||||||
|
panel.grid.minor = element_blank(),
|
||||||
|
panel.border = element_rect(color = "gray80", fill = NA, linewidth = 0.5)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(time_main_plot)
|
||||||
765
eohi1/mixed anova - personality.r
Normal file
@ -0,0 +1,765 @@
|
|||||||
|
# Mixed ANOVA Analysis for Personality Items
|
||||||
|
# EOHI Experiment Data Analysis - Item Level Analysis
|
||||||
|
# Variables: NPastDiff_pers_extravert, NPastDiff_pers_critical, NPastDiff_pers_dependable, NPastDiff_pers_anxious, NPastDiff_pers_complex
|
||||||
|
# NFutDiff_pers_extravert, NFutDiff_pers_critical, NFutDiff_pers_dependable, NFutDiff_pers_anxious, NFutDiff_pers_complex
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(tidyverse)
|
||||||
|
library(ez)
|
||||||
|
library(car)
|
||||||
|
library(afex) # For aov_ez (cleaner ANOVA output)
|
||||||
|
library(nortest) # For normality tests
|
||||||
|
library(emmeans) # For post-hoc comparisons
|
||||||
|
library(purrr) # For map functions
|
||||||
|
library(effsize) # For Cohen's d calculations
|
||||||
|
library(effectsize) # For effect size calculations
|
||||||
|
|
||||||
|
# Global options to remove scientific notation
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
|
||||||
|
options(contrasts = c("contr.sum", "contr.poly"))
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Display basic information about the dataset
|
||||||
|
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(data$pID))))
|
||||||
|
|
||||||
|
# Verify the specific variables we need
|
||||||
|
required_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
|
||||||
|
"NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex")
|
||||||
|
|
||||||
|
missing_vars <- required_vars[!required_vars %in% colnames(data)]
|
||||||
|
if (length(missing_vars) > 0) {
|
||||||
|
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
|
||||||
|
} else {
|
||||||
|
print("All required personality item variables found!")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define item mapping
|
||||||
|
item_mapping <- data.frame(
|
||||||
|
variable = c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable", "NPastDiff_pers_anxious", "NPastDiff_pers_complex",
|
||||||
|
"NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable", "NFutDiff_pers_anxious", "NFutDiff_pers_complex"),
|
||||||
|
time = c(rep("Past", 5), rep("Future", 5)),
|
||||||
|
item = rep(c("extravert", "critical", "dependable", "anxious", "complex"), 2),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Item mapping created
|
||||||
|
|
||||||
|
# Efficient data pivoting using pivot_longer
|
||||||
|
long_data <- data %>%
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||||
|
pivot_longer(
|
||||||
|
cols = all_of(required_vars),
|
||||||
|
names_to = "variable",
|
||||||
|
values_to = "MEAN_DIFFERENCE"
|
||||||
|
) %>%
|
||||||
|
left_join(item_mapping, by = "variable") %>%
|
||||||
|
# Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
|
||||||
|
mutate(
|
||||||
|
TIME = factor(time, levels = c("Past", "Future")),
|
||||||
|
ITEM = factor(item, levels = c("extravert", "critical", "dependable", "anxious", "complex")),
|
||||||
|
pID = as.factor(pID),
|
||||||
|
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||||
|
) %>%
|
||||||
|
# Select final columns and remove any rows with missing values
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
|
||||||
|
filter(!is.na(MEAN_DIFFERENCE))
|
||||||
|
|
||||||
|
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(long_data$pID))))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DESCRIPTIVE STATISTICS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Overall descriptive statistics by TIME and ITEM
|
||||||
|
desc_stats <- long_data %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||||
|
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||||
|
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TIME and ITEM:")
|
||||||
|
print(desc_stats)
|
||||||
|
|
||||||
|
# Descriptive statistics by between-subjects factors
|
||||||
|
desc_stats_by_temporal <- long_data %>%
|
||||||
|
group_by(TEMPORAL_DO, TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
|
||||||
|
print(desc_stats_by_temporal)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ASSUMPTION TESTING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Remove missing values for assumption testing
|
||||||
|
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
|
||||||
|
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
|
||||||
|
|
||||||
|
# 1. Missing values check
|
||||||
|
missing_summary <- long_data %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n_total = n(),
|
||||||
|
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||||
|
pct_missing = round(100 * n_missing / n_total, 2),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing values by TIME and ITEM:")
|
||||||
|
print(missing_summary)
|
||||||
|
|
||||||
|
# 2. Outlier detection
|
||||||
|
outlier_summary <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = mean(MEAN_DIFFERENCE),
|
||||||
|
sd = sd(MEAN_DIFFERENCE),
|
||||||
|
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||||
|
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||||
|
iqr = q3 - q1,
|
||||||
|
lower_bound = q1 - 1.5 * iqr,
|
||||||
|
upper_bound = q3 + 1.5 * iqr,
|
||||||
|
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Outlier summary (IQR method):")
|
||||||
|
print(outlier_summary)
|
||||||
|
|
||||||
|
# 3. Anderson-Darling normality test (streamlined)
|
||||||
|
normality_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||||
|
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Anderson-Darling normality test results:")
|
||||||
|
# Round only the numeric columns
|
||||||
|
normality_results_rounded <- normality_results %>%
|
||||||
|
mutate(across(where(is.numeric), ~ round(.x, 5)))
|
||||||
|
print(normality_results_rounded)
|
||||||
|
|
||||||
|
# 4. Homogeneity of variance (Levene's test)
|
||||||
|
# Test homogeneity across TIME within each ITEM
|
||||||
|
homogeneity_time <- long_data_clean %>%
|
||||||
|
group_by(ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across TIME within each ITEM:")
|
||||||
|
print(homogeneity_time)
|
||||||
|
|
||||||
|
# Test homogeneity across ITEM within each TIME
|
||||||
|
homogeneity_item <- long_data_clean %>%
|
||||||
|
group_by(TIME) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across ITEM within each TIME:")
|
||||||
|
print(homogeneity_item)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Function to calculate Hartley's F-max ratio
|
||||||
|
calculate_hartley_ratio <- function(variances) {
|
||||||
|
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||||
|
# within each combination of within-subjects factors (TIME × ITEM)
|
||||||
|
|
||||||
|
# First, let's check what values TEMPORAL_DO actually has
|
||||||
|
print("=== CHECKING TEMPORAL_DO VALUES ===")
|
||||||
|
print("Unique TEMPORAL_DO values:")
|
||||||
|
print(unique(long_data_clean$TEMPORAL_DO))
|
||||||
|
print("TEMPORAL_DO value counts:")
|
||||||
|
print(table(long_data_clean$TEMPORAL_DO))
|
||||||
|
|
||||||
|
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
|
||||||
|
|
||||||
|
observed_temporal_ratios <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||||
|
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||||
|
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||||
|
# Calculate F-max ratio
|
||||||
|
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
select(TIME, ITEM, past_var, fut_var, f_max_ratio)
|
||||||
|
|
||||||
|
print(observed_temporal_ratios)
|
||||||
|
|
||||||
|
# More efficient bootstrap function for Hartley's F-max test
|
||||||
|
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||||
|
# Get unique groups and their sample sizes
|
||||||
|
groups <- unique(data[[group_var]])
|
||||||
|
|
||||||
|
# Calculate observed variances for each group
|
||||||
|
observed_vars <- data %>%
|
||||||
|
dplyr::group_by(!!sym(group_var)) %>%
|
||||||
|
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||||
|
dplyr::pull(var)
|
||||||
|
|
||||||
|
# Handle invalid variances
|
||||||
|
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||||
|
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate observed F-max ratio
|
||||||
|
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||||
|
|
||||||
|
# Pre-allocate storage for bootstrap ratios
|
||||||
|
bootstrap_ratios <- numeric(n_iter)
|
||||||
|
|
||||||
|
# Get group data once
|
||||||
|
group_data_list <- map(groups, ~ {
|
||||||
|
group_data <- data[data[[group_var]] == .x, response_var]
|
||||||
|
group_data[!is.na(group_data)]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Bootstrap with pre-allocated storage
|
||||||
|
for(i in 1:n_iter) {
|
||||||
|
# Bootstrap sample from each group independently
|
||||||
|
sample_vars <- map_dbl(group_data_list, ~ {
|
||||||
|
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||||
|
var(bootstrap_sample, na.rm = TRUE)
|
||||||
|
})
|
||||||
|
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove invalid ratios
|
||||||
|
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||||
|
|
||||||
|
if(length(valid_ratios) == 0) {
|
||||||
|
stop("No valid bootstrap ratios generated")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate critical value (95th percentile)
|
||||||
|
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Return only essential information
|
||||||
|
return(list(
|
||||||
|
observed_ratio = observed_ratio,
|
||||||
|
critical_95 = critical_95,
|
||||||
|
n_valid_iterations = length(valid_ratios)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
|
||||||
|
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
|
||||||
|
set.seed(123) # For reproducibility
|
||||||
|
|
||||||
|
hartley_temporal_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||||
|
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||||
|
significant = observed_ratio > critical_95
|
||||||
|
) %>%
|
||||||
|
select(TIME, ITEM, observed_ratio, critical_95, significant)
|
||||||
|
|
||||||
|
print(hartley_temporal_results)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MIXED ANOVA ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Check for missing data patterns
|
||||||
|
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
|
||||||
|
|
||||||
|
# Check data balance
|
||||||
|
xtabs(~ pID + TIME + ITEM, data = long_data_clean)
|
||||||
|
|
||||||
|
# Check data dimensions and structure
|
||||||
|
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
|
||||||
|
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
|
||||||
|
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
|
||||||
|
print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
|
||||||
|
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
|
||||||
|
|
||||||
|
# Check for complete cases
|
||||||
|
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
|
||||||
|
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
|
||||||
|
|
||||||
|
# Check if design is balanced
|
||||||
|
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
|
||||||
|
|
||||||
|
print(summary(as.vector(design_balance)))
|
||||||
|
|
||||||
|
# Check for any participants with missing combinations
|
||||||
|
missing_combos <- long_data_clean %>%
|
||||||
|
group_by(pID) %>%
|
||||||
|
summarise(
|
||||||
|
n_combinations = n(),
|
||||||
|
expected_combinations = 10, # 2 TIME × 5 ITEM = 10
|
||||||
|
missing_combinations = 10 - n_combinations,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing combinations per participant:")
|
||||||
|
print(missing_combos[missing_combos$missing_combinations > 0, ])
|
||||||
|
|
||||||
|
# Mixed ANOVA using aov() - Traditional approach
|
||||||
|
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||||
|
# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
|
||||||
|
|
||||||
|
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)),
|
||||||
|
data = long_data_clean)
|
||||||
|
|
||||||
|
print("Mixed ANOVA Results (aov):")
|
||||||
|
print(summary(mixed_anova_model))
|
||||||
|
|
||||||
|
# Alternative: Using afex::aov_ez for cleaner output (optional)
|
||||||
|
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
|
||||||
|
mixed_anova_afex <- aov_ez(id = "pID",
|
||||||
|
dv = "MEAN_DIFFERENCE",
|
||||||
|
data = long_data_clean,
|
||||||
|
between = "TEMPORAL_DO",
|
||||||
|
within = c("TIME", "ITEM"))
|
||||||
|
|
||||||
|
print("Mixed ANOVA Results (afex):")
|
||||||
|
print(mixed_anova_afex)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Sphericity tests using ezANOVA (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== SPHERICITY TESTS ===")
|
||||||
|
|
||||||
|
# Test sphericity for ITEM (5 levels - within-subjects)
|
||||||
|
print("Mauchly's Test of Sphericity for ITEM:")
|
||||||
|
tryCatch({
|
||||||
|
# Create a temporary data frame for ezANOVA
|
||||||
|
temp_data <- long_data_clean
|
||||||
|
temp_data$id <- as.numeric(as.factor(temp_data$pID))
|
||||||
|
|
||||||
|
# Run ezANOVA to get sphericity tests
|
||||||
|
ez_item <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = ITEM,
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("ITEM Sphericity Test:")
|
||||||
|
print(ez_item$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in ITEM sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Test sphericity for TIME (2 levels - within-subjects)
|
||||||
|
print("\nMauchly's Test of Sphericity for TIME:")
|
||||||
|
tryCatch({
|
||||||
|
ez_time <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = TIME,
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("TIME Sphericity Test:")
|
||||||
|
print(ez_time$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in TIME sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Test sphericity for TIME × ITEM interaction
|
||||||
|
print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
|
||||||
|
tryCatch({
|
||||||
|
ez_interaction <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = .(TIME, ITEM),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("TIME × ITEM Sphericity Test:")
|
||||||
|
print(ez_interaction$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in TIME × ITEM sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
|
||||||
|
|
||||||
|
# Get corrected results from ezANOVA
|
||||||
|
ez_corrected <- ezANOVA(data = long_data_clean,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = pID,
|
||||||
|
within = .(TIME, ITEM),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("Corrected ANOVA Results with Sphericity Corrections:")
|
||||||
|
print(ez_corrected$ANOVA)
|
||||||
|
|
||||||
|
# Show epsilon values for sphericity corrections
|
||||||
|
print("\nEpsilon Values for Sphericity Corrections:")
|
||||||
|
print(ez_corrected$Mauchly)
|
||||||
|
|
||||||
|
# Show sphericity-corrected results
|
||||||
|
print("\nSphericity-Corrected Results:")
|
||||||
|
print("Available elements in ez_corrected object:")
|
||||||
|
print(names(ez_corrected))
|
||||||
|
|
||||||
|
# Check if sphericity corrections are available
|
||||||
|
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
|
||||||
|
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
|
||||||
|
print(ez_corrected$`Sphericity Corrections`)
|
||||||
|
|
||||||
|
# Extract and display corrected degrees of freedom
|
||||||
|
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
|
||||||
|
|
||||||
|
# Get the sphericity corrections
|
||||||
|
sphericity_corr <- ez_corrected$`Sphericity Corrections`
|
||||||
|
|
||||||
|
# Extract original degrees of freedom from ANOVA table
|
||||||
|
anova_table <- ez_corrected$ANOVA
|
||||||
|
|
||||||
|
# Calculate corrected degrees of freedom
|
||||||
|
corrected_df <- data.frame(
|
||||||
|
Effect = sphericity_corr$Effect,
|
||||||
|
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
GG_epsilon = sphericity_corr$GGe,
|
||||||
|
HF_epsilon = sphericity_corr$HFe
|
||||||
|
)
|
||||||
|
|
||||||
|
print(corrected_df)
|
||||||
|
|
||||||
|
# Also show the corrected F-values and p-values with degrees of freedom
|
||||||
|
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
|
||||||
|
for(i in 1:nrow(corrected_df)) {
|
||||||
|
effect <- corrected_df$Effect[i]
|
||||||
|
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||||
|
|
||||||
|
cat(sprintf("\n%s:\n", effect))
|
||||||
|
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||||
|
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||||
|
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||||
|
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
|
||||||
|
print("Check the Mauchly's test p-values above to determine if corrections are needed")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
|
||||||
|
|
||||||
|
# Create a wide-format data for car package (library already loaded)
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Convert to wide format for car package
|
||||||
|
wide_data <- long_data_clean %>%
|
||||||
|
select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
|
||||||
|
pivot_wider(names_from = c(TIME, ITEM),
|
||||||
|
values_from = MEAN_DIFFERENCE,
|
||||||
|
names_sep = "_")
|
||||||
|
|
||||||
|
# Create the repeated measures design
|
||||||
|
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
|
||||||
|
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
|
||||||
|
|
||||||
|
# Check if all columns exist
|
||||||
|
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
|
||||||
|
if(length(missing_cols) > 0) {
|
||||||
|
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
|
||||||
|
} else {
|
||||||
|
# Create the repeated measures design
|
||||||
|
rm_design <- as.matrix(wide_data[, within_vars])
|
||||||
|
|
||||||
|
# Calculate epsilon values
|
||||||
|
print("Epsilon Values from car package:")
|
||||||
|
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
|
||||||
|
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
|
||||||
|
|
||||||
|
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
|
||||||
|
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
|
||||||
|
|
||||||
|
# Interpretation
|
||||||
|
if(epsilon_gg < 0.75) {
|
||||||
|
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
|
||||||
|
} else if(epsilon_hf > 0.75) {
|
||||||
|
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
|
||||||
|
} else {
|
||||||
|
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MANUAL SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
|
||||||
|
|
||||||
|
# Apply corrections to the original ANOVA results
|
||||||
|
print("Applying Greenhouse-Geisser corrections to ITEM effects:")
|
||||||
|
|
||||||
|
# ITEM main effect (DFn = 4, DFd = 4244)
|
||||||
|
item_df_corrected_gg <- 4 * epsilon_gg
|
||||||
|
item_df_corrected_hf <- 4 * epsilon_hf
|
||||||
|
|
||||||
|
print(paste("ITEM: Original df = 4, 4244"))
|
||||||
|
print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
|
||||||
|
print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
|
||||||
|
|
||||||
|
# TIME × ITEM interaction (DFn = 4, DFd = 4244)
|
||||||
|
interaction_df_corrected_gg <- 4 * epsilon_gg
|
||||||
|
interaction_df_corrected_hf <- 4 * epsilon_hf
|
||||||
|
|
||||||
|
print(paste("TIME × ITEM: Original df = 4, 4244"))
|
||||||
|
print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
|
||||||
|
print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
|
||||||
|
|
||||||
|
# Note: You would need to recalculate p-values with these corrected dfs
|
||||||
|
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
|
||||||
|
print("The ezANOVA function should handle this automatically, but may not display the corrections")
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in manual epsilon calculation:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# EFFECT SIZES (GENERALIZED ETA SQUARED)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Effect size calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
|
||||||
|
|
||||||
|
# Calculate generalized eta squared for the aov model
|
||||||
|
print("Effect Sizes from aov() model:")
|
||||||
|
tryCatch({
|
||||||
|
# Extract effect sizes from aov model
|
||||||
|
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
|
||||||
|
print(round(aov_effects, 5))
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error calculating effect sizes from aov:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Calculate effect sizes for ezANOVA model
|
||||||
|
print("\nEffect Sizes from ezANOVA model:")
|
||||||
|
tryCatch({
|
||||||
|
# ezANOVA provides partial eta squared, convert to generalized
|
||||||
|
ez_effects <- ez_corrected$ANOVA
|
||||||
|
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
|
||||||
|
print("Generalized Eta Squared from ezANOVA:")
|
||||||
|
print(round(ez_effects[, c("Effect", "ges")], 5))
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Extract effect sizes (generalized eta squared)
|
||||||
|
# For aov() objects, we need to extract from the summary
|
||||||
|
anova_summary <- summary(mixed_anova_model)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# NOTE: MIXED MODELS (LMER) NOT NEEDED
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For this balanced repeated measures design, Type III ANOVA with proper
|
||||||
|
# sphericity corrections (implemented above) is the most appropriate approach.
|
||||||
|
# Mixed models (lmer) are typically used for:
|
||||||
|
# - Unbalanced designs
|
||||||
|
# - Missing data patterns
|
||||||
|
# - Nested random effects
|
||||||
|
# - Large, complex datasets
|
||||||
|
#
|
||||||
|
# Your design is balanced and complete, making Type III ANOVA optimal.
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# POST-HOC COMPARISONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Post-hoc comparisons using emmeans
|
||||||
|
print("\n=== POST-HOC COMPARISONS ===")
|
||||||
|
|
||||||
|
# Main effect of TIME
|
||||||
|
print("Main Effect of TIME:")
|
||||||
|
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
|
||||||
|
print(time_contrasts)
|
||||||
|
|
||||||
|
# Main effect of ITEM
|
||||||
|
print("\nMain Effect of ITEM:")
|
||||||
|
item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(item_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
|
||||||
|
print(item_contrasts)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# INTERACTION EXPLORATIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# TIME × ITEM Interaction
|
||||||
|
print("\n=== TIME × ITEM INTERACTION ===")
|
||||||
|
time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_item_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of ITEM within each TIME:")
|
||||||
|
time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
|
||||||
|
print(time_item_simple)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TIME within each ITEM:")
|
||||||
|
time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
|
||||||
|
print(time_item_simple2)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Cohen's d calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
|
||||||
|
|
||||||
|
# Function to calculate Cohen's d for pairwise comparisons
|
||||||
|
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||||
|
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_pairs) > 0) {
|
||||||
|
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||||
|
print(significant_pairs)
|
||||||
|
|
||||||
|
cat("\nCohen's d calculated from raw data:\n")
|
||||||
|
|
||||||
|
for(i in seq_len(nrow(significant_pairs))) {
|
||||||
|
comparison <- significant_pairs[i, ]
|
||||||
|
contrast_name <- as.character(comparison$contrast)
|
||||||
|
|
||||||
|
# Parse the contrast
|
||||||
|
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||||
|
if(length(contrast_parts) == 2) {
|
||||||
|
level1 <- trimws(contrast_parts[1])
|
||||||
|
level2 <- trimws(contrast_parts[2])
|
||||||
|
|
||||||
|
# Get raw data for both conditions
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
group2_level <- as.character(comparison[[group2_var]])
|
||||||
|
data1 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level1 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
|
||||||
|
data2 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level2 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
} else {
|
||||||
|
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||||
|
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||||
|
}
|
||||||
|
|
||||||
|
if(length(data1) > 0 && length(data2) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(data1, data2)
|
||||||
|
|
||||||
|
cat(sprintf("Comparison: %s", contrast_name))
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
cat(sprintf(" | %s", group2_level))
|
||||||
|
}
|
||||||
|
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant pairwise comparisons found.\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each ITEM
|
||||||
|
time_item_simple2_df <- as.data.frame(time_item_simple2)
|
||||||
|
calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
# Get simple effects of ITEM within each TIME
|
||||||
|
time_item_simple_df <- as.data.frame(time_item_simple)
|
||||||
|
calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
|
||||||
765
eohi1/mixed anova - preferences.r
Normal file
@ -0,0 +1,765 @@
|
|||||||
|
# Mixed ANOVA Analysis for Preference Items
|
||||||
|
# EOHI Experiment Data Analysis - Item Level Analysis
|
||||||
|
# Variables: NPastDiff_pref_read, NPastDiff_pref_music, NPastDiff_pref_tv, NPastDiff_pref_nap, NPastDiff_pref_travel
|
||||||
|
# NFutDiff_pref_read, NFutDiff_pref_music, NFutDiff_pref_tv, NFutDiff_pref_nap, NFutDiff_pref_travel
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(tidyverse)
|
||||||
|
library(ez)
|
||||||
|
library(car)
|
||||||
|
library(afex) # For aov_ez (cleaner ANOVA output)
|
||||||
|
library(nortest) # For normality tests
|
||||||
|
library(emmeans) # For post-hoc comparisons
|
||||||
|
library(purrr) # For map functions
|
||||||
|
library(effsize) # For Cohen's d calculations
|
||||||
|
library(effectsize) # For effect size calculations
|
||||||
|
|
||||||
|
# Global options to remove scientific notation
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
|
||||||
|
options(contrasts = c("contr.sum", "contr.poly"))
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Display basic information about the dataset
|
||||||
|
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(data$pID))))
|
||||||
|
|
||||||
|
# Verify the specific variables we need
|
||||||
|
required_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
|
||||||
|
"NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
||||||
|
|
||||||
|
missing_vars <- required_vars[!required_vars %in% colnames(data)]
|
||||||
|
if (length(missing_vars) > 0) {
|
||||||
|
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
|
||||||
|
} else {
|
||||||
|
print("All required preference item variables found!")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define item mapping
|
||||||
|
item_mapping <- data.frame(
|
||||||
|
variable = c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv", "NPastDiff_pref_nap", "NPastDiff_pref_travel",
|
||||||
|
"NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv", "NFutDiff_pref_nap", "NFutDiff_pref_travel"),
|
||||||
|
time = c(rep("Past", 5), rep("Future", 5)),
|
||||||
|
item = rep(c("read", "music", "tv", "nap", "travel"), 2),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Item mapping created
|
||||||
|
|
||||||
|
# Efficient data pivoting using pivot_longer
|
||||||
|
long_data <- data %>%
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||||
|
pivot_longer(
|
||||||
|
cols = all_of(required_vars),
|
||||||
|
names_to = "variable",
|
||||||
|
values_to = "MEAN_DIFFERENCE"
|
||||||
|
) %>%
|
||||||
|
left_join(item_mapping, by = "variable") %>%
|
||||||
|
# Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
|
||||||
|
mutate(
|
||||||
|
TIME = factor(time, levels = c("Past", "Future")),
|
||||||
|
ITEM = factor(item, levels = c("read", "music", "tv", "nap", "travel")),
|
||||||
|
pID = as.factor(pID),
|
||||||
|
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||||
|
) %>%
|
||||||
|
# Select final columns and remove any rows with missing values
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
|
||||||
|
filter(!is.na(MEAN_DIFFERENCE))
|
||||||
|
|
||||||
|
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(long_data$pID))))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DESCRIPTIVE STATISTICS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Overall descriptive statistics by TIME and ITEM
|
||||||
|
desc_stats <- long_data %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||||
|
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||||
|
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TIME and ITEM:")
|
||||||
|
print(desc_stats)
|
||||||
|
|
||||||
|
# Descriptive statistics by between-subjects factors
|
||||||
|
desc_stats_by_temporal <- long_data %>%
|
||||||
|
group_by(TEMPORAL_DO, TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
|
||||||
|
print(desc_stats_by_temporal)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ASSUMPTION TESTING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Remove missing values for assumption testing
|
||||||
|
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
|
||||||
|
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
|
||||||
|
|
||||||
|
# 1. Missing values check
|
||||||
|
missing_summary <- long_data %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n_total = n(),
|
||||||
|
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||||
|
pct_missing = round(100 * n_missing / n_total, 2),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing values by TIME and ITEM:")
|
||||||
|
print(missing_summary)
|
||||||
|
|
||||||
|
# 2. Outlier detection
|
||||||
|
outlier_summary <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = mean(MEAN_DIFFERENCE),
|
||||||
|
sd = sd(MEAN_DIFFERENCE),
|
||||||
|
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||||
|
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||||
|
iqr = q3 - q1,
|
||||||
|
lower_bound = q1 - 1.5 * iqr,
|
||||||
|
upper_bound = q3 + 1.5 * iqr,
|
||||||
|
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Outlier summary (IQR method):")
|
||||||
|
print(outlier_summary)
|
||||||
|
|
||||||
|
# 3. Anderson-Darling normality test (streamlined)
|
||||||
|
normality_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||||
|
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Anderson-Darling normality test results:")
|
||||||
|
# Round only the numeric columns
|
||||||
|
normality_results_rounded <- normality_results %>%
|
||||||
|
mutate(across(where(is.numeric), ~ round(.x, 5)))
|
||||||
|
print(normality_results_rounded)
|
||||||
|
|
||||||
|
# 4. Homogeneity of variance (Levene's test)
|
||||||
|
# Test homogeneity across TIME within each ITEM
|
||||||
|
homogeneity_time <- long_data_clean %>%
|
||||||
|
group_by(ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across TIME within each ITEM:")
|
||||||
|
print(homogeneity_time)
|
||||||
|
|
||||||
|
# Test homogeneity across ITEM within each TIME
|
||||||
|
homogeneity_item <- long_data_clean %>%
|
||||||
|
group_by(TIME) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across ITEM within each TIME:")
|
||||||
|
print(homogeneity_item)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Function to calculate Hartley's F-max ratio
|
||||||
|
calculate_hartley_ratio <- function(variances) {
|
||||||
|
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||||
|
# within each combination of within-subjects factors (TIME × ITEM)
|
||||||
|
|
||||||
|
# First, let's check what values TEMPORAL_DO actually has
|
||||||
|
print("=== CHECKING TEMPORAL_DO VALUES ===")
|
||||||
|
print("Unique TEMPORAL_DO values:")
|
||||||
|
print(unique(long_data_clean$TEMPORAL_DO))
|
||||||
|
print("TEMPORAL_DO value counts:")
|
||||||
|
print(table(long_data_clean$TEMPORAL_DO))
|
||||||
|
|
||||||
|
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
|
||||||
|
|
||||||
|
observed_temporal_ratios <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||||
|
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||||
|
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||||
|
# Calculate F-max ratio
|
||||||
|
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
select(TIME, ITEM, past_var, fut_var, f_max_ratio)
|
||||||
|
|
||||||
|
print(observed_temporal_ratios)
|
||||||
|
|
||||||
|
# More efficient bootstrap function for Hartley's F-max test
|
||||||
|
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||||
|
# Get unique groups and their sample sizes
|
||||||
|
groups <- unique(data[[group_var]])
|
||||||
|
|
||||||
|
# Calculate observed variances for each group
|
||||||
|
observed_vars <- data %>%
|
||||||
|
dplyr::group_by(!!sym(group_var)) %>%
|
||||||
|
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||||
|
dplyr::pull(var)
|
||||||
|
|
||||||
|
# Handle invalid variances
|
||||||
|
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||||
|
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate observed F-max ratio
|
||||||
|
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||||
|
|
||||||
|
# Pre-allocate storage for bootstrap ratios
|
||||||
|
bootstrap_ratios <- numeric(n_iter)
|
||||||
|
|
||||||
|
# Get group data once
|
||||||
|
group_data_list <- map(groups, ~ {
|
||||||
|
group_data <- data[data[[group_var]] == .x, response_var]
|
||||||
|
group_data[!is.na(group_data)]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Bootstrap with pre-allocated storage
|
||||||
|
for(i in 1:n_iter) {
|
||||||
|
# Bootstrap sample from each group independently
|
||||||
|
sample_vars <- map_dbl(group_data_list, ~ {
|
||||||
|
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||||
|
var(bootstrap_sample, na.rm = TRUE)
|
||||||
|
})
|
||||||
|
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove invalid ratios
|
||||||
|
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||||
|
|
||||||
|
if(length(valid_ratios) == 0) {
|
||||||
|
stop("No valid bootstrap ratios generated")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate critical value (95th percentile)
|
||||||
|
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Return only essential information
|
||||||
|
return(list(
|
||||||
|
observed_ratio = observed_ratio,
|
||||||
|
critical_95 = critical_95,
|
||||||
|
n_valid_iterations = length(valid_ratios)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
|
||||||
|
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
|
||||||
|
set.seed(123) # For reproducibility
|
||||||
|
|
||||||
|
hartley_temporal_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||||
|
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||||
|
significant = observed_ratio > critical_95
|
||||||
|
) %>%
|
||||||
|
select(TIME, ITEM, observed_ratio, critical_95, significant)
|
||||||
|
|
||||||
|
print(hartley_temporal_results)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MIXED ANOVA ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Check for missing data patterns
|
||||||
|
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
|
||||||
|
|
||||||
|
# Check data balance
|
||||||
|
xtabs(~ pID + TIME + ITEM, data = long_data_clean)
|
||||||
|
|
||||||
|
# Check data dimensions and structure
|
||||||
|
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
|
||||||
|
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
|
||||||
|
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
|
||||||
|
print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
|
||||||
|
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
|
||||||
|
|
||||||
|
# Check for complete cases
|
||||||
|
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
|
||||||
|
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
|
||||||
|
|
||||||
|
# Check if design is balanced
|
||||||
|
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
|
||||||
|
|
||||||
|
print(summary(as.vector(design_balance)))
|
||||||
|
|
||||||
|
# Check for any participants with missing combinations
|
||||||
|
missing_combos <- long_data_clean %>%
|
||||||
|
group_by(pID) %>%
|
||||||
|
summarise(
|
||||||
|
n_combinations = n(),
|
||||||
|
expected_combinations = 10, # 2 TIME × 5 ITEM = 10
|
||||||
|
missing_combinations = 10 - n_combinations,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing combinations per participant:")
|
||||||
|
print(missing_combos[missing_combos$missing_combinations > 0, ])
|
||||||
|
|
||||||
|
# Mixed ANOVA using aov() - Traditional approach
|
||||||
|
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||||
|
# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
|
||||||
|
|
||||||
|
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)),
|
||||||
|
data = long_data_clean)
|
||||||
|
|
||||||
|
print("Mixed ANOVA Results (aov):")
|
||||||
|
print(summary(mixed_anova_model))
|
||||||
|
|
||||||
|
# Alternative: Using afex::aov_ez for cleaner output (optional)
|
||||||
|
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
|
||||||
|
mixed_anova_afex <- aov_ez(id = "pID",
|
||||||
|
dv = "MEAN_DIFFERENCE",
|
||||||
|
data = long_data_clean,
|
||||||
|
between = "TEMPORAL_DO",
|
||||||
|
within = c("TIME", "ITEM"))
|
||||||
|
|
||||||
|
print("Mixed ANOVA Results (afex):")
|
||||||
|
print(mixed_anova_afex)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Sphericity tests using ezANOVA (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== SPHERICITY TESTS ===")
|
||||||
|
|
||||||
|
# Test sphericity for ITEM (5 levels - within-subjects)
|
||||||
|
print("Mauchly's Test of Sphericity for ITEM:")
|
||||||
|
tryCatch({
|
||||||
|
# Create a temporary data frame for ezANOVA
|
||||||
|
temp_data <- long_data_clean
|
||||||
|
temp_data$id <- as.numeric(as.factor(temp_data$pID))
|
||||||
|
|
||||||
|
# Run ezANOVA to get sphericity tests
|
||||||
|
ez_item <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = ITEM,
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("ITEM Sphericity Test:")
|
||||||
|
print(ez_item$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in ITEM sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Test sphericity for TIME (2 levels - within-subjects)
|
||||||
|
print("\nMauchly's Test of Sphericity for TIME:")
|
||||||
|
tryCatch({
|
||||||
|
ez_time <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = TIME,
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("TIME Sphericity Test:")
|
||||||
|
print(ez_time$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in TIME sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Test sphericity for TIME × ITEM interaction
|
||||||
|
print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
|
||||||
|
tryCatch({
|
||||||
|
ez_interaction <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = .(TIME, ITEM),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("TIME × ITEM Sphericity Test:")
|
||||||
|
print(ez_interaction$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in TIME × ITEM sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
|
||||||
|
|
||||||
|
# Get corrected results from ezANOVA
|
||||||
|
ez_corrected <- ezANOVA(data = long_data_clean,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = pID,
|
||||||
|
within = .(TIME, ITEM),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("Corrected ANOVA Results with Sphericity Corrections:")
|
||||||
|
print(ez_corrected$ANOVA)
|
||||||
|
|
||||||
|
# Show epsilon values for sphericity corrections
|
||||||
|
print("\nEpsilon Values for Sphericity Corrections:")
|
||||||
|
print(ez_corrected$Mauchly)
|
||||||
|
|
||||||
|
# Show sphericity-corrected results
|
||||||
|
print("\nSphericity-Corrected Results:")
|
||||||
|
print("Available elements in ez_corrected object:")
|
||||||
|
print(names(ez_corrected))
|
||||||
|
|
||||||
|
# Check if sphericity corrections are available
|
||||||
|
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
|
||||||
|
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
|
||||||
|
print(ez_corrected$`Sphericity Corrections`)
|
||||||
|
|
||||||
|
# Extract and display corrected degrees of freedom
|
||||||
|
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
|
||||||
|
|
||||||
|
# Get the sphericity corrections
|
||||||
|
sphericity_corr <- ez_corrected$`Sphericity Corrections`
|
||||||
|
|
||||||
|
# Extract original degrees of freedom from ANOVA table
|
||||||
|
anova_table <- ez_corrected$ANOVA
|
||||||
|
|
||||||
|
# Calculate corrected degrees of freedom
|
||||||
|
corrected_df <- data.frame(
|
||||||
|
Effect = sphericity_corr$Effect,
|
||||||
|
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
GG_epsilon = sphericity_corr$GGe,
|
||||||
|
HF_epsilon = sphericity_corr$HFe
|
||||||
|
)
|
||||||
|
|
||||||
|
print(corrected_df)
|
||||||
|
|
||||||
|
# Also show the corrected F-values and p-values with degrees of freedom
|
||||||
|
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
|
||||||
|
for(i in 1:nrow(corrected_df)) {
|
||||||
|
effect <- corrected_df$Effect[i]
|
||||||
|
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||||
|
|
||||||
|
cat(sprintf("\n%s:\n", effect))
|
||||||
|
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||||
|
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||||
|
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||||
|
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
|
||||||
|
print("Check the Mauchly's test p-values above to determine if corrections are needed")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
|
||||||
|
|
||||||
|
# Create a wide-format data for car package (library already loaded)
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Convert to wide format for car package
|
||||||
|
wide_data <- long_data_clean %>%
|
||||||
|
select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
|
||||||
|
pivot_wider(names_from = c(TIME, ITEM),
|
||||||
|
values_from = MEAN_DIFFERENCE,
|
||||||
|
names_sep = "_")
|
||||||
|
|
||||||
|
# Create the repeated measures design
|
||||||
|
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
|
||||||
|
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
|
||||||
|
|
||||||
|
# Check if all columns exist
|
||||||
|
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
|
||||||
|
if(length(missing_cols) > 0) {
|
||||||
|
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
|
||||||
|
} else {
|
||||||
|
# Create the repeated measures design
|
||||||
|
rm_design <- as.matrix(wide_data[, within_vars])
|
||||||
|
|
||||||
|
# Calculate epsilon values
|
||||||
|
print("Epsilon Values from car package:")
|
||||||
|
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
|
||||||
|
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
|
||||||
|
|
||||||
|
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
|
||||||
|
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
|
||||||
|
|
||||||
|
# Interpretation
|
||||||
|
if(epsilon_gg < 0.75) {
|
||||||
|
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
|
||||||
|
} else if(epsilon_hf > 0.75) {
|
||||||
|
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
|
||||||
|
} else {
|
||||||
|
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MANUAL SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
|
||||||
|
|
||||||
|
# Apply corrections to the original ANOVA results
|
||||||
|
print("Applying Greenhouse-Geisser corrections to ITEM effects:")
|
||||||
|
|
||||||
|
# ITEM main effect (DFn = 4, DFd = 4244)
|
||||||
|
item_df_corrected_gg <- 4 * epsilon_gg
|
||||||
|
item_df_corrected_hf <- 4 * epsilon_hf
|
||||||
|
|
||||||
|
print(paste("ITEM: Original df = 4, 4244"))
|
||||||
|
print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
|
||||||
|
print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
|
||||||
|
|
||||||
|
# TIME × ITEM interaction (DFn = 4, DFd = 4244)
|
||||||
|
interaction_df_corrected_gg <- 4 * epsilon_gg
|
||||||
|
interaction_df_corrected_hf <- 4 * epsilon_hf
|
||||||
|
|
||||||
|
print(paste("TIME × ITEM: Original df = 4, 4244"))
|
||||||
|
print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
|
||||||
|
print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
|
||||||
|
|
||||||
|
# Note: You would need to recalculate p-values with these corrected dfs
|
||||||
|
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
|
||||||
|
print("The ezANOVA function should handle this automatically, but may not display the corrections")
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in manual epsilon calculation:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# EFFECT SIZES (GENERALIZED ETA SQUARED)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Effect size calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
|
||||||
|
|
||||||
|
# Calculate generalized eta squared for the aov model
|
||||||
|
print("Effect Sizes from aov() model:")
|
||||||
|
tryCatch({
|
||||||
|
# Extract effect sizes from aov model
|
||||||
|
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
|
||||||
|
print(round(aov_effects, 5))
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error calculating effect sizes from aov:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Calculate effect sizes for ezANOVA model
|
||||||
|
print("\nEffect Sizes from ezANOVA model:")
|
||||||
|
tryCatch({
|
||||||
|
# ezANOVA provides partial eta squared, convert to generalized
|
||||||
|
ez_effects <- ez_corrected$ANOVA
|
||||||
|
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
|
||||||
|
print("Generalized Eta Squared from ezANOVA:")
|
||||||
|
print(round(ez_effects[, c("Effect", "ges")], 5))
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Extract effect sizes (generalized eta squared)
|
||||||
|
# For aov() objects, we need to extract from the summary
|
||||||
|
anova_summary <- summary(mixed_anova_model)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# NOTE: MIXED MODELS (LMER) NOT NEEDED
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For this balanced repeated measures design, Type III ANOVA with proper
|
||||||
|
# sphericity corrections (implemented above) is the most appropriate approach.
|
||||||
|
# Mixed models (lmer) are typically used for:
|
||||||
|
# - Unbalanced designs
|
||||||
|
# - Missing data patterns
|
||||||
|
# - Nested random effects
|
||||||
|
# - Large, complex datasets
|
||||||
|
#
|
||||||
|
# Your design is balanced and complete, making Type III ANOVA optimal.
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# POST-HOC COMPARISONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Post-hoc comparisons using emmeans
|
||||||
|
print("\n=== POST-HOC COMPARISONS ===")
|
||||||
|
|
||||||
|
# Main effect of TIME
|
||||||
|
print("Main Effect of TIME:")
|
||||||
|
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
|
||||||
|
print(time_contrasts)
|
||||||
|
|
||||||
|
# Main effect of ITEM
|
||||||
|
print("\nMain Effect of ITEM:")
|
||||||
|
item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(item_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
|
||||||
|
print(item_contrasts)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# INTERACTION EXPLORATIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# TIME × ITEM Interaction
|
||||||
|
print("\n=== TIME × ITEM INTERACTION ===")
|
||||||
|
time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_item_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of ITEM within each TIME:")
|
||||||
|
time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
|
||||||
|
print(time_item_simple)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TIME within each ITEM:")
|
||||||
|
time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
|
||||||
|
print(time_item_simple2)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Cohen's d calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
|
||||||
|
|
||||||
|
# Function to calculate Cohen's d for pairwise comparisons
|
||||||
|
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||||
|
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_pairs) > 0) {
|
||||||
|
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||||
|
print(significant_pairs)
|
||||||
|
|
||||||
|
cat("\nCohen's d calculated from raw data:\n")
|
||||||
|
|
||||||
|
for(i in seq_len(nrow(significant_pairs))) {
|
||||||
|
comparison <- significant_pairs[i, ]
|
||||||
|
contrast_name <- as.character(comparison$contrast)
|
||||||
|
|
||||||
|
# Parse the contrast
|
||||||
|
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||||
|
if(length(contrast_parts) == 2) {
|
||||||
|
level1 <- trimws(contrast_parts[1])
|
||||||
|
level2 <- trimws(contrast_parts[2])
|
||||||
|
|
||||||
|
# Get raw data for both conditions
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
group2_level <- as.character(comparison[[group2_var]])
|
||||||
|
data1 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level1 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
|
||||||
|
data2 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level2 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
} else {
|
||||||
|
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||||
|
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||||
|
}
|
||||||
|
|
||||||
|
if(length(data1) > 0 && length(data2) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(data1, data2)
|
||||||
|
|
||||||
|
cat(sprintf("Comparison: %s", contrast_name))
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
cat(sprintf(" | %s", group2_level))
|
||||||
|
}
|
||||||
|
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant pairwise comparisons found.\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each ITEM
|
||||||
|
time_item_simple2_df <- as.data.frame(time_item_simple2)
|
||||||
|
calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
# Get simple effects of ITEM within each TIME
|
||||||
|
time_item_simple_df <- as.data.frame(time_item_simple)
|
||||||
|
calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
|
||||||
765
eohi1/mixed anova - values.r
Normal file
@ -0,0 +1,765 @@
|
|||||||
|
# Mixed ANOVA Analysis for Values Items
|
||||||
|
# EOHI Experiment Data Analysis - Item Level Analysis
|
||||||
|
# Variables: NPastDiff_val_obey, NPastDiff_val_trad, NPastDiff_val_opinion, NPastDiff_val_performance, NPastDiff_val_justice
|
||||||
|
# NFutDiff_val_obey, NFutDiff_val_trad, NFutDiff_val_opinion, NFutDiff_val_performance, NFutDiff_val_justice
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(tidyverse)
|
||||||
|
library(ez)
|
||||||
|
library(car)
|
||||||
|
library(afex) # For aov_ez (cleaner ANOVA output)
|
||||||
|
library(nortest) # For normality tests
|
||||||
|
library(emmeans) # For post-hoc comparisons
|
||||||
|
library(purrr) # For map functions
|
||||||
|
library(effsize) # For Cohen's d calculations
|
||||||
|
library(effectsize) # For effect size calculations
|
||||||
|
|
||||||
|
# Global options to remove scientific notation
|
||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
# Set contrasts to sum for mixed ANOVA (necessary for proper interpretation)
|
||||||
|
options(contrasts = c("contr.sum", "contr.poly"))
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Display basic information about the dataset
|
||||||
|
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(data$pID))))
|
||||||
|
|
||||||
|
# Verify the specific variables we need
|
||||||
|
required_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
|
||||||
|
"NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice")
|
||||||
|
|
||||||
|
missing_vars <- required_vars[!required_vars %in% colnames(data)]
|
||||||
|
if (length(missing_vars) > 0) {
|
||||||
|
print(paste("Warning: Missing variables:", paste(missing_vars, collapse = ", ")))
|
||||||
|
} else {
|
||||||
|
print("All required values item variables found!")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define item mapping
|
||||||
|
item_mapping <- data.frame(
|
||||||
|
variable = c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion", "NPastDiff_val_performance", "NPastDiff_val_justice",
|
||||||
|
"NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion", "NFutDiff_val_performance", "NFutDiff_val_justice"),
|
||||||
|
time = c(rep("Past", 5), rep("Future", 5)),
|
||||||
|
item = rep(c("obey", "trad", "opinion", "performance", "justice"), 2),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Item mapping created
|
||||||
|
|
||||||
|
# Efficient data pivoting using pivot_longer
|
||||||
|
long_data <- data %>%
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||||
|
pivot_longer(
|
||||||
|
cols = all_of(required_vars),
|
||||||
|
names_to = "variable",
|
||||||
|
values_to = "MEAN_DIFFERENCE"
|
||||||
|
) %>%
|
||||||
|
left_join(item_mapping, by = "variable") %>%
|
||||||
|
# Convert to factors with proper levels (note: columns are 'time' and 'item' from mapping)
|
||||||
|
mutate(
|
||||||
|
TIME = factor(time, levels = c("Past", "Future")),
|
||||||
|
ITEM = factor(item, levels = c("obey", "trad", "opinion", "performance", "justice")),
|
||||||
|
pID = as.factor(pID),
|
||||||
|
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||||
|
) %>%
|
||||||
|
# Select final columns and remove any rows with missing values
|
||||||
|
select(pID, ResponseId, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
|
||||||
|
filter(!is.na(MEAN_DIFFERENCE))
|
||||||
|
|
||||||
|
print(paste("Long data dimensions:", paste(dim(long_data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(long_data$pID))))
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# DESCRIPTIVE STATISTICS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Overall descriptive statistics by TIME and ITEM
|
||||||
|
desc_stats <- long_data %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||||
|
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||||
|
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TIME and ITEM:")
|
||||||
|
print(desc_stats)
|
||||||
|
|
||||||
|
# Descriptive statistics by between-subjects factors
|
||||||
|
desc_stats_by_temporal <- long_data %>%
|
||||||
|
group_by(TEMPORAL_DO, TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Descriptive statistics by TEMPORAL_DO, TIME, and ITEM:")
|
||||||
|
print(desc_stats_by_temporal)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ASSUMPTION TESTING
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Remove missing values for assumption testing
|
||||||
|
long_data_clean <- long_data[!is.na(long_data$MEAN_DIFFERENCE), ]
|
||||||
|
print(paste("Data after removing missing values:", paste(dim(long_data_clean), collapse = " x")))
|
||||||
|
|
||||||
|
# 1. Missing values check
|
||||||
|
missing_summary <- long_data %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n_total = n(),
|
||||||
|
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||||
|
pct_missing = round(100 * n_missing / n_total, 2),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing values by TIME and ITEM:")
|
||||||
|
print(missing_summary)
|
||||||
|
|
||||||
|
# 2. Outlier detection
|
||||||
|
outlier_summary <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
mean = mean(MEAN_DIFFERENCE),
|
||||||
|
sd = sd(MEAN_DIFFERENCE),
|
||||||
|
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||||
|
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||||
|
iqr = q3 - q1,
|
||||||
|
lower_bound = q1 - 1.5 * iqr,
|
||||||
|
upper_bound = q3 + 1.5 * iqr,
|
||||||
|
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Outlier summary (IQR method):")
|
||||||
|
print(outlier_summary)
|
||||||
|
|
||||||
|
# 3. Anderson-Darling normality test (streamlined)
|
||||||
|
normality_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
n = n(),
|
||||||
|
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||||
|
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Anderson-Darling normality test results:")
|
||||||
|
# Round only the numeric columns
|
||||||
|
normality_results_rounded <- normality_results %>%
|
||||||
|
mutate(across(where(is.numeric), ~ round(.x, 5)))
|
||||||
|
print(normality_results_rounded)
|
||||||
|
|
||||||
|
# 4. Homogeneity of variance (Levene's test)
|
||||||
|
# Test homogeneity across TIME within each ITEM
|
||||||
|
homogeneity_time <- long_data_clean %>%
|
||||||
|
group_by(ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across TIME within each ITEM:")
|
||||||
|
print(homogeneity_time)
|
||||||
|
|
||||||
|
# Test homogeneity across ITEM within each TIME
|
||||||
|
homogeneity_item <- long_data_clean %>%
|
||||||
|
group_by(TIME) %>%
|
||||||
|
summarise(
|
||||||
|
levene_F = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`F value`[1],
|
||||||
|
levene_p = leveneTest(MEAN_DIFFERENCE ~ ITEM)$`Pr(>F)`[1],
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Homogeneity of variance across ITEM within each TIME:")
|
||||||
|
print(homogeneity_item)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# HARTLEY'S F-MAX TEST WITH BOOTSTRAP CRITICAL VALUES
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Function to calculate Hartley's F-max ratio
|
||||||
|
calculate_hartley_ratio <- function(variances) {
|
||||||
|
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CALCULATE OBSERVED F-MAX RATIOS FOR MIXED ANOVA
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||||
|
# within each combination of within-subjects factors (TIME × ITEM)
|
||||||
|
|
||||||
|
# First, let's check what values TEMPORAL_DO actually has
|
||||||
|
print("=== CHECKING TEMPORAL_DO VALUES ===")
|
||||||
|
print("Unique TEMPORAL_DO values:")
|
||||||
|
print(unique(long_data_clean$TEMPORAL_DO))
|
||||||
|
print("TEMPORAL_DO value counts:")
|
||||||
|
print(table(long_data_clean$TEMPORAL_DO))
|
||||||
|
|
||||||
|
print("\n=== OBSERVED F-MAX RATIOS: TEMPORAL_DO within each TIME × ITEM combination ===")
|
||||||
|
|
||||||
|
observed_temporal_ratios <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||||
|
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||||
|
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||||
|
# Calculate F-max ratio
|
||||||
|
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
select(TIME, ITEM, past_var, fut_var, f_max_ratio)
|
||||||
|
|
||||||
|
print(observed_temporal_ratios)
|
||||||
|
|
||||||
|
# More efficient bootstrap function for Hartley's F-max test
|
||||||
|
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||||
|
# Get unique groups and their sample sizes
|
||||||
|
groups <- unique(data[[group_var]])
|
||||||
|
|
||||||
|
# Calculate observed variances for each group
|
||||||
|
observed_vars <- data %>%
|
||||||
|
dplyr::group_by(!!sym(group_var)) %>%
|
||||||
|
dplyr::summarise(var = var(!!sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||||
|
dplyr::pull(var)
|
||||||
|
|
||||||
|
# Handle invalid variances
|
||||||
|
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||||
|
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate observed F-max ratio
|
||||||
|
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||||
|
|
||||||
|
# Pre-allocate storage for bootstrap ratios
|
||||||
|
bootstrap_ratios <- numeric(n_iter)
|
||||||
|
|
||||||
|
# Get group data once
|
||||||
|
group_data_list <- map(groups, ~ {
|
||||||
|
group_data <- data[data[[group_var]] == .x, response_var]
|
||||||
|
group_data[!is.na(group_data)]
|
||||||
|
})
|
||||||
|
|
||||||
|
# Bootstrap with pre-allocated storage
|
||||||
|
for(i in 1:n_iter) {
|
||||||
|
# Bootstrap sample from each group independently
|
||||||
|
sample_vars <- map_dbl(group_data_list, ~ {
|
||||||
|
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||||
|
var(bootstrap_sample, na.rm = TRUE)
|
||||||
|
})
|
||||||
|
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Remove invalid ratios
|
||||||
|
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||||
|
|
||||||
|
if(length(valid_ratios) == 0) {
|
||||||
|
stop("No valid bootstrap ratios generated")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate critical value (95th percentile)
|
||||||
|
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Return only essential information
|
||||||
|
return(list(
|
||||||
|
observed_ratio = observed_ratio,
|
||||||
|
critical_95 = critical_95,
|
||||||
|
n_valid_iterations = length(valid_ratios)
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Hartley's F-max test across TEMPORAL_DO within each TIME × ITEM combination
|
||||||
|
print("\n=== HARTLEY'S F-MAX TEST RESULTS ===")
|
||||||
|
set.seed(123) # For reproducibility
|
||||||
|
|
||||||
|
hartley_temporal_results <- long_data_clean %>%
|
||||||
|
group_by(TIME, ITEM) %>%
|
||||||
|
summarise(
|
||||||
|
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||||
|
.groups = 'drop'
|
||||||
|
) %>%
|
||||||
|
mutate(
|
||||||
|
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||||
|
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||||
|
significant = observed_ratio > critical_95
|
||||||
|
) %>%
|
||||||
|
select(TIME, ITEM, observed_ratio, critical_95, significant)
|
||||||
|
|
||||||
|
print(hartley_temporal_results)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MIXED ANOVA ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Check for missing data patterns
|
||||||
|
table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM, useNA = "ifany")
|
||||||
|
|
||||||
|
# Check data balance
|
||||||
|
xtabs(~ pID + TIME + ITEM, data = long_data_clean)
|
||||||
|
|
||||||
|
# Check data dimensions and structure
|
||||||
|
print(paste("Data size for ANOVA:", nrow(long_data_clean), "rows"))
|
||||||
|
print(paste("Number of participants:", length(unique(long_data_clean$pID))))
|
||||||
|
print(paste("Number of TIME levels:", length(levels(long_data_clean$TIME))))
|
||||||
|
print(paste("Number of ITEM levels:", length(levels(long_data_clean$ITEM))))
|
||||||
|
print(paste("Number of TEMPORAL_DO levels:", length(levels(long_data_clean$TEMPORAL_DO))))
|
||||||
|
|
||||||
|
# Check for complete cases
|
||||||
|
complete_cases <- long_data_clean[complete.cases(long_data_clean), ]
|
||||||
|
print(paste("Complete cases:", nrow(complete_cases), "out of", nrow(long_data_clean)))
|
||||||
|
|
||||||
|
# Check if design is balanced
|
||||||
|
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$ITEM)
|
||||||
|
|
||||||
|
print(summary(as.vector(design_balance)))
|
||||||
|
|
||||||
|
# Check for any participants with missing combinations
|
||||||
|
missing_combos <- long_data_clean %>%
|
||||||
|
group_by(pID) %>%
|
||||||
|
summarise(
|
||||||
|
n_combinations = n(),
|
||||||
|
expected_combinations = 10, # 2 TIME × 5 ITEM = 10
|
||||||
|
missing_combinations = 10 - n_combinations,
|
||||||
|
.groups = 'drop'
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Missing combinations per participant:")
|
||||||
|
print(missing_combos[missing_combos$missing_combinations > 0, ])
|
||||||
|
|
||||||
|
# Mixed ANOVA using aov() - Traditional approach
|
||||||
|
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||||
|
# Within-subjects: TIME (2 levels: Past, Future) × ITEM (5 levels: read, music, tv, nap, travel)
|
||||||
|
|
||||||
|
mixed_anova_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * ITEM + Error(pID/(TIME * ITEM)),
|
||||||
|
data = long_data_clean)
|
||||||
|
|
||||||
|
print("Mixed ANOVA Results (aov):")
|
||||||
|
print(summary(mixed_anova_model))
|
||||||
|
|
||||||
|
# Alternative: Using afex::aov_ez for cleaner output (optional)
|
||||||
|
print("\n=== ALTERNATIVE: AFEX AOV_EZ RESULTS ===")
|
||||||
|
mixed_anova_afex <- aov_ez(id = "pID",
|
||||||
|
dv = "MEAN_DIFFERENCE",
|
||||||
|
data = long_data_clean,
|
||||||
|
between = "TEMPORAL_DO",
|
||||||
|
within = c("TIME", "ITEM"))
|
||||||
|
|
||||||
|
print("Mixed ANOVA Results (afex):")
|
||||||
|
print(mixed_anova_afex)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# SPHERICITY TESTS FOR WITHIN-SUBJECTS FACTORS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Sphericity tests using ezANOVA (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== SPHERICITY TESTS ===")
|
||||||
|
|
||||||
|
# Test sphericity for ITEM (5 levels - within-subjects)
|
||||||
|
print("Mauchly's Test of Sphericity for ITEM:")
|
||||||
|
tryCatch({
|
||||||
|
# Create a temporary data frame for ezANOVA
|
||||||
|
temp_data <- long_data_clean
|
||||||
|
temp_data$id <- as.numeric(as.factor(temp_data$pID))
|
||||||
|
|
||||||
|
# Run ezANOVA to get sphericity tests
|
||||||
|
ez_item <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = ITEM,
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("ITEM Sphericity Test:")
|
||||||
|
print(ez_item$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in ITEM sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Test sphericity for TIME (2 levels - within-subjects)
|
||||||
|
print("\nMauchly's Test of Sphericity for TIME:")
|
||||||
|
tryCatch({
|
||||||
|
ez_time <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = TIME,
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("TIME Sphericity Test:")
|
||||||
|
print(ez_time$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in TIME sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Test sphericity for TIME × ITEM interaction
|
||||||
|
print("\nMauchly's Test of Sphericity for TIME × ITEM Interaction:")
|
||||||
|
tryCatch({
|
||||||
|
ez_interaction <- ezANOVA(data = temp_data,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = id,
|
||||||
|
within = .(TIME, ITEM),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("TIME × ITEM Sphericity Test:")
|
||||||
|
print(ez_interaction$Mauchly)
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in TIME × ITEM sphericity test:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# CORRECTED ANOVA RESULTS WITH SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== CORRECTED ANOVA RESULTS (with sphericity corrections) ===")
|
||||||
|
|
||||||
|
# Get corrected results from ezANOVA
|
||||||
|
ez_corrected <- ezANOVA(data = long_data_clean,
|
||||||
|
dv = MEAN_DIFFERENCE,
|
||||||
|
wid = pID,
|
||||||
|
within = .(TIME, ITEM),
|
||||||
|
type = 3,
|
||||||
|
detailed = TRUE)
|
||||||
|
|
||||||
|
print("Corrected ANOVA Results with Sphericity Corrections:")
|
||||||
|
print(ez_corrected$ANOVA)
|
||||||
|
|
||||||
|
# Show epsilon values for sphericity corrections
|
||||||
|
print("\nEpsilon Values for Sphericity Corrections:")
|
||||||
|
print(ez_corrected$Mauchly)
|
||||||
|
|
||||||
|
# Show sphericity-corrected results
|
||||||
|
print("\nSphericity-Corrected Results:")
|
||||||
|
print("Available elements in ez_corrected object:")
|
||||||
|
print(names(ez_corrected))
|
||||||
|
|
||||||
|
# Check if sphericity corrections are available
|
||||||
|
if(!is.null(ez_corrected$`Sphericity Corrections`)) {
|
||||||
|
print("\nGreenhouse-Geisser and Huynh-Feldt Corrections:")
|
||||||
|
print(ez_corrected$`Sphericity Corrections`)
|
||||||
|
|
||||||
|
# Extract and display corrected degrees of freedom
|
||||||
|
cat("\n=== CORRECTED DEGREES OF FREEDOM ===\n")
|
||||||
|
|
||||||
|
# Get the sphericity corrections
|
||||||
|
sphericity_corr <- ez_corrected$`Sphericity Corrections`
|
||||||
|
|
||||||
|
# Extract original degrees of freedom from ANOVA table
|
||||||
|
anova_table <- ez_corrected$ANOVA
|
||||||
|
|
||||||
|
# Calculate corrected degrees of freedom
|
||||||
|
corrected_df <- data.frame(
|
||||||
|
Effect = sphericity_corr$Effect,
|
||||||
|
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||||
|
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||||
|
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||||
|
GG_epsilon = sphericity_corr$GGe,
|
||||||
|
HF_epsilon = sphericity_corr$HFe
|
||||||
|
)
|
||||||
|
|
||||||
|
print(corrected_df)
|
||||||
|
|
||||||
|
# Also show the corrected F-values and p-values with degrees of freedom
|
||||||
|
cat("\n=== CORRECTED F-TESTS WITH DEGREES OF FREEDOM ===\n")
|
||||||
|
for(i in 1:nrow(corrected_df)) {
|
||||||
|
effect <- corrected_df$Effect[i]
|
||||||
|
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||||
|
|
||||||
|
cat(sprintf("\n%s:\n", effect))
|
||||||
|
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||||
|
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||||
|
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||||
|
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||||
|
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print("\nNote: Sphericity corrections may not be displayed if sphericity is met")
|
||||||
|
print("Check the Mauchly's test p-values above to determine if corrections are needed")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# ALTERNATIVE: SPHERICITY CORRECTIONS USING CAR PACKAGE
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== SPHERICITY CORRECTIONS USING CAR PACKAGE ===")
|
||||||
|
|
||||||
|
# Create a wide-format data for car package (library already loaded)
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
# Convert to wide format for car package
|
||||||
|
wide_data <- long_data_clean %>%
|
||||||
|
select(pID, TEMPORAL_DO, TIME, ITEM, MEAN_DIFFERENCE) %>%
|
||||||
|
pivot_wider(names_from = c(TIME, ITEM),
|
||||||
|
values_from = MEAN_DIFFERENCE,
|
||||||
|
names_sep = "_")
|
||||||
|
|
||||||
|
# Create the repeated measures design
|
||||||
|
within_vars <- c("Past_Preferences", "Past_Personality", "Past_Values", "Past_Life",
|
||||||
|
"Future_Preferences", "Future_Personality", "Future_Values", "Future_Life")
|
||||||
|
|
||||||
|
# Check if all columns exist
|
||||||
|
missing_cols <- within_vars[!within_vars %in% colnames(wide_data)]
|
||||||
|
if(length(missing_cols) > 0) {
|
||||||
|
print(paste("Missing columns for car analysis:", paste(missing_cols, collapse = ", ")))
|
||||||
|
} else {
|
||||||
|
# Create the repeated measures design
|
||||||
|
rm_design <- as.matrix(wide_data[, within_vars])
|
||||||
|
|
||||||
|
# Calculate epsilon values
|
||||||
|
print("Epsilon Values from car package:")
|
||||||
|
epsilon_gg <- epsilon(rm_design, type = "Greenhouse-Geisser")
|
||||||
|
epsilon_hf <- epsilon(rm_design, type = "Huynh-Feldt")
|
||||||
|
|
||||||
|
print(paste("Greenhouse-Geisser epsilon:", round(epsilon_gg, 4)))
|
||||||
|
print(paste("Huynh-Feldt epsilon:", round(epsilon_hf, 4)))
|
||||||
|
|
||||||
|
# Interpretation
|
||||||
|
if(epsilon_gg < 0.75) {
|
||||||
|
print("Recommendation: Use Greenhouse-Geisser correction (epsilon < 0.75)")
|
||||||
|
} else if(epsilon_hf > 0.75) {
|
||||||
|
print("Recommendation: Use Huynh-Feldt correction (epsilon > 0.75)")
|
||||||
|
} else {
|
||||||
|
print("Recommendation: Use Greenhouse-Geisser correction (conservative)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# MANUAL SPHERICITY CORRECTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== MANUAL SPHERICITY CORRECTIONS ===")
|
||||||
|
|
||||||
|
# Apply corrections to the original ANOVA results
|
||||||
|
print("Applying Greenhouse-Geisser corrections to ITEM effects:")
|
||||||
|
|
||||||
|
# ITEM main effect (DFn = 4, DFd = 4244)
|
||||||
|
item_df_corrected_gg <- 4 * epsilon_gg
|
||||||
|
item_df_corrected_hf <- 4 * epsilon_hf
|
||||||
|
|
||||||
|
print(paste("ITEM: Original df = 4, 4244"))
|
||||||
|
print(paste("ITEM: GG corrected df =", round(item_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
|
||||||
|
print(paste("ITEM: HF corrected df =", round(item_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
|
||||||
|
|
||||||
|
# TIME × ITEM interaction (DFn = 4, DFd = 4244)
|
||||||
|
interaction_df_corrected_gg <- 4 * epsilon_gg
|
||||||
|
interaction_df_corrected_hf <- 4 * epsilon_hf
|
||||||
|
|
||||||
|
print(paste("TIME × ITEM: Original df = 4, 4244"))
|
||||||
|
print(paste("TIME × ITEM: GG corrected df =", round(interaction_df_corrected_gg, 2), ",", round(4244 * epsilon_gg, 2)))
|
||||||
|
print(paste("TIME × ITEM: HF corrected df =", round(interaction_df_corrected_hf, 2), ",", round(4244 * epsilon_hf, 2)))
|
||||||
|
|
||||||
|
# Note: You would need to recalculate p-values with these corrected dfs
|
||||||
|
print("\nNote: To get corrected p-values, you would need to recalculate F-tests with corrected degrees of freedom")
|
||||||
|
print("The ezANOVA function should handle this automatically, but may not display the corrections")
|
||||||
|
}
|
||||||
|
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error in manual epsilon calculation:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# EFFECT SIZES (GENERALIZED ETA SQUARED)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Effect size calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== EFFECT SIZES (GENERALIZED ETA SQUARED) ===")
|
||||||
|
|
||||||
|
# Calculate generalized eta squared for the aov model
|
||||||
|
print("Effect Sizes from aov() model:")
|
||||||
|
tryCatch({
|
||||||
|
# Extract effect sizes from aov model
|
||||||
|
aov_effects <- eta_squared(mixed_anova_model, partial = TRUE, generalized = TRUE)
|
||||||
|
print(round(aov_effects, 5))
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error calculating effect sizes from aov:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Calculate effect sizes for ezANOVA model
|
||||||
|
print("\nEffect Sizes from ezANOVA model:")
|
||||||
|
tryCatch({
|
||||||
|
# ezANOVA provides partial eta squared, convert to generalized
|
||||||
|
ez_effects <- ez_corrected$ANOVA
|
||||||
|
ez_effects$ges <- ez_effects$ges # ezANOVA already provides generalized eta squared
|
||||||
|
print("Generalized Eta Squared from ezANOVA:")
|
||||||
|
print(round(ez_effects[, c("Effect", "ges")], 5))
|
||||||
|
}, error = function(e) {
|
||||||
|
print(paste("Error extracting effect sizes from ezANOVA:", e$message))
|
||||||
|
})
|
||||||
|
|
||||||
|
# Extract effect sizes (generalized eta squared)
|
||||||
|
# For aov() objects, we need to extract from the summary
|
||||||
|
anova_summary <- summary(mixed_anova_model)
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# NOTE: MIXED MODELS (LMER) NOT NEEDED
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# For this balanced repeated measures design, Type III ANOVA with proper
|
||||||
|
# sphericity corrections (implemented above) is the most appropriate approach.
|
||||||
|
# Mixed models (lmer) are typically used for:
|
||||||
|
# - Unbalanced designs
|
||||||
|
# - Missing data patterns
|
||||||
|
# - Nested random effects
|
||||||
|
# - Large, complex datasets
|
||||||
|
#
|
||||||
|
# Your design is balanced and complete, making Type III ANOVA optimal.
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# POST-HOC COMPARISONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Post-hoc comparisons using emmeans
|
||||||
|
print("\n=== POST-HOC COMPARISONS ===")
|
||||||
|
|
||||||
|
# Main effect of TIME
|
||||||
|
print("Main Effect of TIME:")
|
||||||
|
time_emmeans <- emmeans(mixed_anova_model, ~ TIME)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
time_contrasts <- pairs(time_emmeans, adjust = "bonferroni")
|
||||||
|
print(time_contrasts)
|
||||||
|
|
||||||
|
# Main effect of ITEM
|
||||||
|
print("\nMain Effect of ITEM:")
|
||||||
|
item_emmeans <- emmeans(mixed_anova_model, ~ ITEM)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(item_emmeans)
|
||||||
|
print("\nPairwise Contrasts:")
|
||||||
|
item_contrasts <- pairs(item_emmeans, adjust = "bonferroni")
|
||||||
|
print(item_contrasts)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# INTERACTION EXPLORATIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# TIME × ITEM Interaction
|
||||||
|
print("\n=== TIME × ITEM INTERACTION ===")
|
||||||
|
time_item_emmeans <- emmeans(mixed_anova_model, ~ TIME * ITEM)
|
||||||
|
print("Estimated Marginal Means:")
|
||||||
|
print(time_item_emmeans)
|
||||||
|
|
||||||
|
print("\nSimple Effects of ITEM within each TIME:")
|
||||||
|
time_item_simple <- pairs(time_item_emmeans, by = "TIME", adjust = "bonferroni")
|
||||||
|
print(time_item_simple)
|
||||||
|
|
||||||
|
print("\nSimple Effects of TIME within each ITEM:")
|
||||||
|
time_item_simple2 <- pairs(time_item_emmeans, by = "ITEM", adjust = "bonferroni")
|
||||||
|
print(time_item_simple2)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COMPREHENSIVE THREE-WAY INTERACTION ANALYSIS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
# Cohen's d calculations (library already loaded)
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR SIGNIFICANT TWO-WAY INTERACTIONS ===")
|
||||||
|
|
||||||
|
# Function to calculate Cohen's d for pairwise comparisons
|
||||||
|
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||||
|
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||||
|
|
||||||
|
if(nrow(significant_pairs) > 0) {
|
||||||
|
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||||
|
print(significant_pairs)
|
||||||
|
|
||||||
|
cat("\nCohen's d calculated from raw data:\n")
|
||||||
|
|
||||||
|
for(i in seq_len(nrow(significant_pairs))) {
|
||||||
|
comparison <- significant_pairs[i, ]
|
||||||
|
contrast_name <- as.character(comparison$contrast)
|
||||||
|
|
||||||
|
# Parse the contrast
|
||||||
|
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||||
|
if(length(contrast_parts) == 2) {
|
||||||
|
level1 <- trimws(contrast_parts[1])
|
||||||
|
level2 <- trimws(contrast_parts[2])
|
||||||
|
|
||||||
|
# Get raw data for both conditions
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
group2_level <- as.character(comparison[[group2_var]])
|
||||||
|
data1 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level1 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
|
||||||
|
data2 <- data[[response_var]][
|
||||||
|
data[[group1_var]] == level2 &
|
||||||
|
data[[group2_var]] == group2_level]
|
||||||
|
} else {
|
||||||
|
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||||
|
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||||
|
}
|
||||||
|
|
||||||
|
if(length(data1) > 0 && length(data2) > 0) {
|
||||||
|
# Calculate Cohen's d using effsize package
|
||||||
|
cohens_d_result <- cohen.d(data1, data2)
|
||||||
|
|
||||||
|
cat(sprintf("Comparison: %s", contrast_name))
|
||||||
|
if(group2_var %in% colnames(comparison)) {
|
||||||
|
cat(sprintf(" | %s", group2_level))
|
||||||
|
}
|
||||||
|
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||||
|
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||||
|
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||||
|
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat("No significant pairwise comparisons found.\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# 2. TIME × DOMAIN INTERACTION (SIGNIFICANT: p = 0.012)
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
print("\n=== COHEN'S D FOR TIME × DOMAIN INTERACTION ===")
|
||||||
|
|
||||||
|
# Get simple effects of TIME within each ITEM
|
||||||
|
time_item_simple2_df <- as.data.frame(time_item_simple2)
|
||||||
|
calculate_cohens_d_for_pairs(time_item_simple2_df, long_data_clean, "TIME", "ITEM", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
# Get simple effects of ITEM within each TIME
|
||||||
|
time_item_simple_df <- as.data.frame(time_item_simple)
|
||||||
|
calculate_cohens_d_for_pairs(time_item_simple_df, long_data_clean, "ITEM", "TIME", "MEAN_DIFFERENCE")
|
||||||
|
|
||||||
|
|
||||||
BIN
eohi1/normality_plots.pdf
Normal file
10
eohi1/pearson_correlations.csv
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
"","NPast_mean_total","NFut_mean_total","DGEN_past_mean","DGEN_fut_mean","domain_mean","DGEN_mean","AOT_total","CRT_correct","CRT_int"
|
||||||
|
"NPast_mean_total",1,0.58407,0.37645,0.19034,0.91767,0.33077,0.04531,-0.00144,-0.01053
|
||||||
|
"NFut_mean_total",0.58407,1,0.31658,0.32376,0.85851,0.37059,-0.04928,-0.074,0.05859
|
||||||
|
"DGEN_past_mean",0.37645,0.31658,1,0.77074,0.3928,0.92613,-0.20675,-0.04166,0.0038
|
||||||
|
"DGEN_fut_mean",0.19034,0.32376,0.77074,1,0.27874,0.92466,-0.28285,-0.0818,0.03967
|
||||||
|
"domain_mean",0.91767,0.85851,0.3928,0.27874,1,0.39038,0.0045,-0.03713,0.02203
|
||||||
|
"DGEN_mean",0.33077,0.37059,0.92613,0.92466,0.39038,1,-0.23213,-0.05422,0.01483
|
||||||
|
"AOT_total",0.04531,-0.04928,-0.20675,-0.28285,0.0045,-0.23213,1,0.24279,-0.20318
|
||||||
|
"CRT_correct",-0.00144,-0.074,-0.04166,-0.0818,-0.03713,-0.05422,0.24279,1,-0.89661
|
||||||
|
"CRT_int",-0.01053,0.05859,0.0038,0.03967,0.02203,0.01483,-0.20318,-0.89661,1
|
||||||
|
232
eohi1/regression e1 - edu x ehi.r
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
library(car)
|
||||||
|
library(lmtest)
|
||||||
|
library(stargazer)
|
||||||
|
library(sandwich)
|
||||||
|
library(lmtest)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
df <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
data <- df %>%
|
||||||
|
select(eohiDGEN_mean, ehi_global_mean, demo_edu) %>%
|
||||||
|
mutate(demo_edu = as.factor(demo_edu))
|
||||||
|
|
||||||
|
# examine data object
|
||||||
|
str(data)
|
||||||
|
colSums(is.na(data))
|
||||||
|
sapply(data, class)
|
||||||
|
levels(data$demo_edu)
|
||||||
|
|
||||||
|
data$demo_edu <- factor(data$demo_edu, levels = c(
|
||||||
|
"High School (or equivalent)",
|
||||||
|
"Trade School (non-military)",
|
||||||
|
"College Diploma/Certificate",
|
||||||
|
"University - Undergraduate",
|
||||||
|
"University - Graduate (Masters)",
|
||||||
|
"University - PhD",
|
||||||
|
"Professional Degree (ex. JD/MD)"
|
||||||
|
))
|
||||||
|
|
||||||
|
levels(data$demo_edu)
|
||||||
|
# Create dummy variables
|
||||||
|
dummy_vars <- model.matrix(~ demo_edu - 1, data = data)
|
||||||
|
dummy_df <- as.data.frame(dummy_vars)
|
||||||
|
|
||||||
|
# Rename columns with meaningful names (excluding reference level)
|
||||||
|
colnames(dummy_df) <- c(
|
||||||
|
"edu_highschool", # reference level (will be dropped)
|
||||||
|
"edu_trade",
|
||||||
|
"edu_college",
|
||||||
|
"edu_uni_undergrad",
|
||||||
|
"edu_uni_masters",
|
||||||
|
"edu_uni_phd",
|
||||||
|
"edu_prof"
|
||||||
|
)
|
||||||
|
# Add to your data
|
||||||
|
data <- cbind(data, dummy_df)
|
||||||
|
|
||||||
|
data <- data %>% select(-starts_with("edu_highschool"))
|
||||||
|
|
||||||
|
#### MODEL 1 - DGEN ####
|
||||||
|
|
||||||
|
model_DGEN <- lm(eohiDGEN_mean ~ edu_trade + edu_college + edu_uni_undergrad +
|
||||||
|
edu_uni_masters + edu_uni_phd + edu_prof, data = data)
|
||||||
|
|
||||||
|
# Model 1 diagnostics
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(model_DGEN, which = 1) # Residuals vs Fitted
|
||||||
|
plot(model_DGEN, which = 2) # Normal Q-Q, normality
|
||||||
|
hist(residuals(model_DGEN), main = "Histogram of Residuals", xlab = "Residuals")
|
||||||
|
shapiro.test(residuals(model_DGEN))
|
||||||
|
|
||||||
|
plot(model_DGEN, which = 3) # Scale-Location
|
||||||
|
plot(model_DGEN, which = 4) # Cook's Distance
|
||||||
|
|
||||||
|
# Model 1 specific tests
|
||||||
|
vif(model_DGEN) # Multicollinearity
|
||||||
|
dwtest(model_DGEN) # Independence
|
||||||
|
outlierTest(model_DGEN) # Outliers
|
||||||
|
|
||||||
|
# Look at the specific influential cases
|
||||||
|
data[c(670, 388, 760), ]
|
||||||
|
|
||||||
|
# 6 outliers: 670, 388, 760, 258, 873, 1030; acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
|
||||||
|
# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
|
||||||
|
# normality violated but sample size is robust to violation
|
||||||
|
# no multicollinearity
|
||||||
|
# no autocorrelation (samples are independent)
|
||||||
|
|
||||||
|
#results
|
||||||
|
print(summary(model_DGEN))
|
||||||
|
print(AIC(model_DGEN))
|
||||||
|
|
||||||
|
# Create a nice formatted table
|
||||||
|
stargazer(model_DGEN, type = "text",
|
||||||
|
title = "Regression Results: Education and EOHI-DGEN",
|
||||||
|
dep.var.labels = "EOHI-DGEN Mean",
|
||||||
|
covariate.labels = c("Trade School", "College", "University Undergrad",
|
||||||
|
"University Masters", "University PhD", "Professional Degree"),
|
||||||
|
report = "vcsp",
|
||||||
|
add.lines = list(c("AIC", round(AIC(model_DGEN), 2))))
|
||||||
|
|
||||||
|
# Use robust standard errors (doesn't change coefficients, just SEs)
|
||||||
|
modelDGEN_robust <- coeftest(model_DGEN, vcov = vcovHC(model_DGEN, type = "HC3"))
|
||||||
|
|
||||||
|
stargazer(modelDGEN_robust, type = "text",
|
||||||
|
title = "Regression Results: Education and EOHI-DGEN",
|
||||||
|
dep.var.labels = "EOHI-DGEN Mean",
|
||||||
|
covariate.labels = c("Trade School", "College", "University Undergrad",
|
||||||
|
"University Masters", "University PhD", "Professional Degree"),
|
||||||
|
report = "vcsp")
|
||||||
|
|
||||||
|
|
||||||
|
#### MODEL 2 - DOMAIN ####
|
||||||
|
|
||||||
|
model_domain <- lm(ehi_global_mean ~ edu_trade + edu_college + edu_uni_undergrad +
|
||||||
|
edu_uni_masters + edu_uni_phd + edu_prof, data = data)
|
||||||
|
|
||||||
|
# ASSUMPTION CHECKING FOR MODEL 2 (model_domain)
|
||||||
|
plot(model_domain, which = 1) # Residuals vs Fitted
|
||||||
|
|
||||||
|
plot(model_domain, which = 2) # Normal Q-Q, normality
|
||||||
|
hist(residuals(model_domain), main = "Histogram of Residuals", xlab = "Residuals")
|
||||||
|
shapiro.test(residuals(model_domain))
|
||||||
|
|
||||||
|
plot(model_domain, which = 3) # Scale-Location
|
||||||
|
plot(model_domain, which = 4) # Cook's Distance
|
||||||
|
|
||||||
|
# Model 2 specific tests
|
||||||
|
vif(model_domain) # Multicollinearity
|
||||||
|
dwtest(model_domain) # Independence
|
||||||
|
outlierTest(model_domain) # Outliers
|
||||||
|
|
||||||
|
# Check if the autocorrelation is real or artifactual
|
||||||
|
# Plot residuals against observation order
|
||||||
|
plot(residuals(model_domain), type = "l")
|
||||||
|
abline(h = 0, col = "red")
|
||||||
|
|
||||||
|
|
||||||
|
# 6 outliers: acknoledge their presence but also they represent ~0.58% of total sample size, which is well below the 5% of outliers that would be considered acceptable.
|
||||||
|
# heterescedasticity: may be d/t binary vars creating discrete clusters, or d/t real heteroscedasticity.
|
||||||
|
# normality violated but sample size is robust to violation
|
||||||
|
# no multicollinearity
|
||||||
|
# auto correlation is significant, may be due to aggregated measure of multiple repeated measures
|
||||||
|
|
||||||
|
# Reset plotting to 1x1
|
||||||
|
# par(mfrow = c(1, 1))
|
||||||
|
|
||||||
|
print(summary(model_domain))
|
||||||
|
print(AIC(model_domain))
|
||||||
|
|
||||||
|
stargazer(model_domain, type = "text",
|
||||||
|
title = "Regression Results: Education and EHI Domain",
|
||||||
|
dep.var.labels = "EHI Domain Mean",
|
||||||
|
covariate.labels = c("Trade School", "College", "University Undergrad",
|
||||||
|
"University Masters", "University PhD", "Professional Degree"),
|
||||||
|
report = "vcsp", # This shows coefficients, SEs, and p-values
|
||||||
|
add.lines = list(c("AIC", round(AIC(model_domain), 2))))
|
||||||
|
|
||||||
|
# Use robust standard errors (doesn't change coefficients, just SEs)
|
||||||
|
modelDOMAIN_robust <- coeftest(model_domain, vcov = vcovHC(model_domain, type = "HC3"))
|
||||||
|
|
||||||
|
stargazer(modelDOMAIN_robust, type = "text",
|
||||||
|
title = "Regression Results: Education and EHI Domain",
|
||||||
|
dep.var.labels = "EHI Domain Mean",
|
||||||
|
covariate.labels = c("Trade School", "College", "University Undergrad",
|
||||||
|
"University Masters", "University PhD", "Professional Degree"),
|
||||||
|
report = "vcsp")
|
||||||
|
|
||||||
|
#### PLOTS ####
|
||||||
|
|
||||||
|
library(ggplot2)
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
# Calculate means and confidence intervals for EOHI-DGEN
|
||||||
|
edu_summary_DGEN <- data %>%
|
||||||
|
group_by(demo_edu) %>%
|
||||||
|
summarise(
|
||||||
|
mean_DGEN = mean(eohiDGEN_mean, na.rm = TRUE),
|
||||||
|
n = n(),
|
||||||
|
se_DGEN = sd(eohiDGEN_mean, na.rm = TRUE) / sqrt(n()),
|
||||||
|
ci_lower_DGEN = mean_DGEN - 1.96 * se_DGEN,
|
||||||
|
ci_upper_DGEN = mean_DGEN + 1.96 * se_DGEN
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate means and confidence intervals for EHI Domain
|
||||||
|
edu_summary_domain <- data %>%
|
||||||
|
group_by(demo_edu) %>%
|
||||||
|
summarise(
|
||||||
|
mean_domain = mean(ehi_global_mean, na.rm = TRUE),
|
||||||
|
n = n(),
|
||||||
|
se_domain = sd(ehi_global_mean, na.rm = TRUE) / sqrt(n()),
|
||||||
|
ci_lower_domain = mean_domain - 1.96 * se_domain,
|
||||||
|
ci_upper_domain = mean_domain + 1.96 * se_domain
|
||||||
|
)
|
||||||
|
|
||||||
|
# Plot 1: EOHI-DGEN means with confidence intervals
|
||||||
|
p1 <- ggplot(edu_summary_DGEN, aes(x = demo_edu, y = mean_DGEN)) +
|
||||||
|
geom_point(size = 3, color = "steelblue") +
|
||||||
|
geom_errorbar(aes(ymin = ci_lower_DGEN, ymax = ci_upper_DGEN),
|
||||||
|
width = 0.1, color = "steelblue", linewidth = 1) +
|
||||||
|
labs(
|
||||||
|
title = "Mean EOHI-DGEN by Education Level",
|
||||||
|
subtitle = "Error bars show 95% confidence intervals",
|
||||||
|
x = "Education Level",
|
||||||
|
y = "Mean EOHI-DGEN Score"
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
theme(
|
||||||
|
axis.text.x = element_text(angle = 45, hjust = 1),
|
||||||
|
plot.title = element_text(size = 14, face = "bold"),
|
||||||
|
plot.subtitle = element_text(size = 10, color = "gray60")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Plot 2: EHI Domain means with confidence intervals
|
||||||
|
p2 <- ggplot(edu_summary_domain, aes(x = demo_edu, y = mean_domain)) +
|
||||||
|
geom_point(size = 3, color = "darkgreen") +
|
||||||
|
geom_errorbar(aes(ymin = ci_lower_domain, ymax = ci_upper_domain),
|
||||||
|
width = 0.1, color = "darkgreen", linewidth = 1) +
|
||||||
|
labs(
|
||||||
|
title = "Mean EHI Domain by Education Level",
|
||||||
|
subtitle = "Error bars show 95% confidence intervals",
|
||||||
|
x = "Education Level",
|
||||||
|
y = "Mean EHI Domain Score"
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
theme(
|
||||||
|
axis.text.x = element_text(angle = 45, hjust = 1),
|
||||||
|
plot.title = element_text(size = 14, face = "bold"),
|
||||||
|
plot.subtitle = element_text(size = 10, color = "gray60")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Display the plots
|
||||||
|
print(p1)
|
||||||
|
print(p2)
|
||||||
|
|
||||||
|
# Save the plots
|
||||||
|
ggsave("education_DGEN_means.png", p1, width = 10, height = 6, dpi = 300)
|
||||||
|
ggsave("education_domain_means.png", p2, width = 10, height = 6, dpi = 300)
|
||||||
320
eohi1/regression e1 - ehi x sex x age.r
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
library(car)
|
||||||
|
library(lmtest)
|
||||||
|
library(stargazer)
|
||||||
|
library(sandwich)
|
||||||
|
library(lmtest)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||||
|
|
||||||
|
df <- read.csv("ehi1.csv")
|
||||||
|
|
||||||
|
data <- df %>%
|
||||||
|
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1) %>%
|
||||||
|
filter(demo_sex != "Prefer not to say")
|
||||||
|
|
||||||
|
str(data)
|
||||||
|
colSums(is.na(data))
|
||||||
|
sapply(data, class)
|
||||||
|
|
||||||
|
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||||
|
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||||
|
|
||||||
|
# Verify the dummy coding
|
||||||
|
print(table(data$demo_sex, data$sex_dummy))
|
||||||
|
|
||||||
|
#descriptives
|
||||||
|
|
||||||
|
# Descriptives for age
|
||||||
|
print(summary(data$demo_age_1))
|
||||||
|
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||||
|
|
||||||
|
# Center demo_age_1 (subtract the mean)
|
||||||
|
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Verify the centering
|
||||||
|
print(summary(data$age_centered))
|
||||||
|
|
||||||
|
# Descriptives for sex (frequency table)
|
||||||
|
print(table(data$demo_sex))
|
||||||
|
print(prop.table(table(data$demo_sex)))
|
||||||
|
|
||||||
|
# Descriptives for sex dummy variable
|
||||||
|
print(table(data$sex_dummy))
|
||||||
|
|
||||||
|
#### REGRESSION MODELS ####
|
||||||
|
# MODEL 1: Age only - EOHI
|
||||||
|
age_DGEN <- lm(eohiDGEN_mean ~ age_centered, data = data)
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(age_DGEN)
|
||||||
|
print(shapiro.test(residuals(age_DGEN)))
|
||||||
|
print(summary(age_DGEN))
|
||||||
|
print(AIC(age_DGEN))
|
||||||
|
|
||||||
|
# MODEL 1: Age only - EHI
|
||||||
|
age_domain <- lm(ehi_global_mean ~ age_centered, data = data)
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(age_domain)
|
||||||
|
print(shapiro.test(residuals(age_domain)))
|
||||||
|
print(summary(age_domain))
|
||||||
|
print(AIC(age_domain))
|
||||||
|
|
||||||
|
# MODEL 2: Sex only - EOHI
|
||||||
|
sex_DGEN <- lm(eohiDGEN_mean ~ sex_dummy, data = data)
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(sex_DGEN)
|
||||||
|
print(shapiro.test(residuals(sex_DGEN)))
|
||||||
|
print(summary(sex_DGEN))
|
||||||
|
print(AIC(sex_DGEN))
|
||||||
|
# P1 (res vs fitted) + P3 (scale location): test for homoscedasticity. relatively flat red line = homoscedasticity. relatively scattered points = homoscedasticity. this assumption is met.
|
||||||
|
# P2 (qq plot): test for normality. points scattered around a relatively straight line = normality. this assumption is violated but large sample is robust.
|
||||||
|
# P4 (residuals vs leverage): test for outliers. high leverage points = outliers. leverage > 2p/n.
|
||||||
|
# p = parameters; for this model p = 2 (intercept + sex_dummy). n = 1061 (removed prefer not to say). threshold = 2*2/1061 = 0.00377. maximum leverage in plot is ~ 0.002 therefore no points have concerning leverage.
|
||||||
|
# across the plots, there are 3 outliers: 258, 670, 872. this represents 0.28% of the data (much less than the acceptable threshold of 5%). therefore, analysis can proceed.
|
||||||
|
|
||||||
|
# MODEL 2: Sex only - EHI
|
||||||
|
sex_domain <- lm(ehi_global_mean ~ sex_dummy, data = data)
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(sex_domain)
|
||||||
|
print(shapiro.test(residuals(sex_domain)))
|
||||||
|
print(summary(sex_domain))
|
||||||
|
print(AIC(sex_domain))
|
||||||
|
|
||||||
|
# MODEL 3: Age + Sex + Interaction - EOHI
|
||||||
|
interaction_DGEN <- lm(eohiDGEN_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(interaction_DGEN)
|
||||||
|
print(shapiro.test(residuals(interaction_DGEN)))
|
||||||
|
vif_DGEN <- vif(interaction_DGEN)
|
||||||
|
print(vif_DGEN)
|
||||||
|
print(summary(interaction_DGEN))
|
||||||
|
print(AIC(interaction_DGEN))
|
||||||
|
|
||||||
|
# MODEL 3: Age + Sex + Interaction - EHI
|
||||||
|
interaction_domain <- lm(ehi_global_mean ~ age_centered + sex_dummy + age_centered:sex_dummy, data = data)
|
||||||
|
par(mfrow = c(2, 2))
|
||||||
|
plot(interaction_domain)
|
||||||
|
print(shapiro.test(residuals(interaction_domain)))
|
||||||
|
vif_domain <- vif(interaction_domain)
|
||||||
|
print(vif_domain)
|
||||||
|
print(summary(interaction_domain))
|
||||||
|
print(AIC(interaction_domain))
|
||||||
|
|
||||||
|
#### troubleshooting ####
|
||||||
|
# Clear any existing plots
|
||||||
|
# dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
#### PLOTS ####
|
||||||
|
|
||||||
|
# Create visual figures for age models
|
||||||
|
library(ggplot2)
|
||||||
|
|
||||||
|
# Figure 1: age_DGEN model
|
||||||
|
p1 <- ggplot(data, aes(x = age_centered, y = eohiDGEN_mean)) +
|
||||||
|
geom_point(alpha = 0.6, color = "steelblue") +
|
||||||
|
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
|
||||||
|
labs(
|
||||||
|
title = "Age and EOHI-DGEN Relationship",
|
||||||
|
x = "Age (centered)",
|
||||||
|
y = "EOHI-DGEN Mean",
|
||||||
|
subtitle = paste("R² =", round(summary(age_DGEN)$r.squared, 3),
|
||||||
|
", p < 0.001")
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
theme(
|
||||||
|
plot.title = element_text(size = 14, face = "bold"),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.subtitle = element_text(size = 10, color = "gray60")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Figure 2: age_domain model
|
||||||
|
p2 <- ggplot(data, aes(x = age_centered, y = ehi_global_mean)) +
|
||||||
|
geom_point(alpha = 0.6, color = "darkgreen") +
|
||||||
|
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +
|
||||||
|
labs(
|
||||||
|
title = "Age and EHI Domain Relationship",
|
||||||
|
x = "Age (centered)",
|
||||||
|
y = "EHI Domain Mean",
|
||||||
|
subtitle = paste("R² =", round(summary(age_domain)$r.squared, 3),
|
||||||
|
", p < 0.001")
|
||||||
|
) +
|
||||||
|
theme_minimal() +
|
||||||
|
theme(
|
||||||
|
plot.title = element_text(size = 14, face = "bold"),
|
||||||
|
axis.title = element_text(size = 12),
|
||||||
|
plot.subtitle = element_text(size = 10, color = "gray60")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save the plots
|
||||||
|
ggsave("age_DGEN_plot.png", p1, width = 8, height = 6, dpi = 300)
|
||||||
|
ggsave("age_domain_plot.png", p2, width = 8, height = 6, dpi = 300)
|
||||||
|
|
||||||
|
# Display the plots
|
||||||
|
print(p1)
|
||||||
|
print(p2)
|
||||||
|
|
||||||
|
#### HTML file ####
|
||||||
|
|
||||||
|
# Create comprehensive HTML report grouped by model
|
||||||
|
library(htmltools)
|
||||||
|
|
||||||
|
# Start HTML document
|
||||||
|
html_content <- htmltools::div(
|
||||||
|
htmltools::h1("Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain"),
|
||||||
|
|
||||||
|
# EOHI-DGEN Models Section
|
||||||
|
htmltools::h2("EOHI-DGEN Models"),
|
||||||
|
|
||||||
|
# Age Model
|
||||||
|
htmltools::h3("1. Age Model (age_DGEN)"),
|
||||||
|
htmltools::div(
|
||||||
|
style = "margin-bottom: 30px;",
|
||||||
|
htmltools::h4("Model Results, AIC, and VIF"),
|
||||||
|
htmltools::HTML(
|
||||||
|
stargazer(
|
||||||
|
age_DGEN,
|
||||||
|
type = "html",
|
||||||
|
title = "Age Model: EOHI-DGEN",
|
||||||
|
dep.var.labels = "EOHI-DGEN Mean",
|
||||||
|
covariate.labels = c("Age (centered)"),
|
||||||
|
report = "vcsp*",
|
||||||
|
add.lines = list(
|
||||||
|
c("AIC", round(AIC(age_DGEN), 2)),
|
||||||
|
c("Max VIF", "N/A")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
htmltools::h4("Assumption Diagnostic Plots"),
|
||||||
|
htmltools::img(src = "age_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;"),
|
||||||
|
htmltools::h4("Model Visualization"),
|
||||||
|
htmltools::img(src = "age_DGEN_plot.png", style = "width: 100%; max-width: 800px;")
|
||||||
|
),
|
||||||
|
|
||||||
|
# Sex Model
|
||||||
|
htmltools::h3("2. Sex Model (sex_DGEN)"),
|
||||||
|
htmltools::div(
|
||||||
|
style = "margin-bottom: 30px;",
|
||||||
|
htmltools::h4("Model Results, AIC, and VIF"),
|
||||||
|
htmltools::HTML(
|
||||||
|
stargazer(
|
||||||
|
sex_DGEN,
|
||||||
|
type = "html",
|
||||||
|
title = "Sex Model: EOHI-DGEN",
|
||||||
|
dep.var.labels = "EOHI-DGEN Mean",
|
||||||
|
covariate.labels = c("Sex (dummy)"),
|
||||||
|
report = "vcsp*",
|
||||||
|
add.lines = list(
|
||||||
|
c("AIC", round(AIC(sex_DGEN), 2)),
|
||||||
|
c("Max VIF", "N/A")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
htmltools::h4("Assumption Diagnostic Plots"),
|
||||||
|
htmltools::img(src = "sex_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
|
||||||
|
),
|
||||||
|
|
||||||
|
# Interaction Model
|
||||||
|
htmltools::h3("3. Interaction Model (interaction_DGEN)"),
|
||||||
|
htmltools::div(
|
||||||
|
style = "margin-bottom: 30px;",
|
||||||
|
htmltools::h4("Model Results, AIC, and VIF"),
|
||||||
|
htmltools::HTML(
|
||||||
|
stargazer(
|
||||||
|
interaction_DGEN,
|
||||||
|
type = "html",
|
||||||
|
title = "Interaction Model: EOHI-DGEN",
|
||||||
|
dep.var.labels = "EOHI-DGEN Mean",
|
||||||
|
covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
|
||||||
|
report = "vcsp*",
|
||||||
|
add.lines = list(
|
||||||
|
c("AIC", round(AIC(interaction_DGEN), 2)),
|
||||||
|
c("Max VIF", max_vif_DGEN)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
htmltools::h4("Assumption Diagnostic Plots"),
|
||||||
|
htmltools::img(src = "interaction_DGEN_assumptions.png", style = "width: 100%; max-width: 800px;")
|
||||||
|
),
|
||||||
|
|
||||||
|
# EHI Domain Models Section
|
||||||
|
htmltools::h2("EHI Domain Models"),
|
||||||
|
|
||||||
|
# Age Model
|
||||||
|
htmltools::h3("1. Age Model (age_domain)"),
|
||||||
|
htmltools::div(
|
||||||
|
style = "margin-bottom: 30px;",
|
||||||
|
htmltools::h4("Model Results, AIC, and VIF"),
|
||||||
|
htmltools::HTML(
|
||||||
|
stargazer(
|
||||||
|
age_domain,
|
||||||
|
type = "html",
|
||||||
|
title = "Age Model: EHI Domain",
|
||||||
|
dep.var.labels = "EHI Domain Mean",
|
||||||
|
covariate.labels = c("Age (centered)"),
|
||||||
|
report = "vcsp*",
|
||||||
|
add.lines = list(
|
||||||
|
c("AIC", round(AIC(age_domain), 2)),
|
||||||
|
c("Max VIF", "N/A")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
htmltools::h4("Assumption Diagnostic Plots"),
|
||||||
|
htmltools::img(src = "age_domain_assumptions.png", style = "width: 100%; max-width: 800px;"),
|
||||||
|
htmltools::h4("Model Visualization"),
|
||||||
|
htmltools::img(src = "age_domain_plot.png", style = "width: 100%; max-width: 800px;")
|
||||||
|
),
|
||||||
|
|
||||||
|
# Sex Model
|
||||||
|
htmltools::h3("2. Sex Model (sex_domain)"),
|
||||||
|
htmltools::div(
|
||||||
|
style = "margin-bottom: 30px;",
|
||||||
|
htmltools::h4("Model Results, AIC, and VIF"),
|
||||||
|
htmltools::HTML(
|
||||||
|
stargazer(
|
||||||
|
sex_domain,
|
||||||
|
type = "html",
|
||||||
|
title = "Sex Model: EHI Domain",
|
||||||
|
dep.var.labels = "EHI Domain Mean",
|
||||||
|
covariate.labels = c("Sex (dummy)"),
|
||||||
|
report = "vcsp*",
|
||||||
|
add.lines = list(
|
||||||
|
c("AIC", round(AIC(sex_domain), 2)),
|
||||||
|
c("Max VIF", "N/A")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
htmltools::h4("Assumption Diagnostic Plots"),
|
||||||
|
htmltools::img(src = "sex_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
|
||||||
|
),
|
||||||
|
|
||||||
|
# Interaction Model
|
||||||
|
htmltools::h3("3. Interaction Model (interaction_domain)"),
|
||||||
|
htmltools::div(
|
||||||
|
style = "margin-bottom: 30px;",
|
||||||
|
htmltools::h4("Model Results, AIC, and VIF"),
|
||||||
|
htmltools::HTML(
|
||||||
|
stargazer(
|
||||||
|
interaction_domain,
|
||||||
|
type = "html",
|
||||||
|
title = "Interaction Model: EHI Domain",
|
||||||
|
dep.var.labels = "EHI Domain Mean",
|
||||||
|
covariate.labels = c("Age (centered)", "Sex (dummy)", "Age x Sex"),
|
||||||
|
report = "vcsp*",
|
||||||
|
add.lines = list(
|
||||||
|
c("AIC", round(AIC(interaction_domain), 2)),
|
||||||
|
c("Max VIF", max_vif_domain)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
htmltools::h4("Assumption Diagnostic Plots"),
|
||||||
|
htmltools::img(src = "interaction_domain_assumptions.png", style = "width: 100%; max-width: 800px;")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save HTML content
|
||||||
|
htmltools::save_html(html_content, "regression_analysis_report.html")
|
||||||
|
|
||||||
|
print("HTML report created: regression_analysis_report.html")
|
||||||
31
eohi1/regression_DGEN_models.html
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
<table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
|
||||||
|
<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
|
||||||
|
<tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
|
||||||
|
<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
|
||||||
|
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
|
||||||
|
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
|
||||||
|
<tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
|
||||||
|
<tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
|
||||||
|
</table>
|
||||||
31
eohi1/regression_EHI_domain_models.html
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
<table style="text-align:center"><caption><strong>Regression Models: EHI Domain</strong></caption>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
|
||||||
|
<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td colspan="3">EHI Domain Mean</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td><td></td><td>-0.006</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(0.001)</td><td></td><td>(0.001)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td></td><td>p = 0.00001<sup>***</sup></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.031</td><td>0.033</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td>(0.031)</td><td>(0.030)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td>p = 0.311</td><td>p = 0.274</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>-0.0003</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td>(0.002)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.883</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Constant</td><td>0.137</td><td>0.121</td><td>0.120</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(0.015)</td><td>(0.022)</td><td>(0.021)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td><td>p = 0.00000<sup>***</sup></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td><td>1562.88</td><td>1518.52</td></tr>
|
||||||
|
<tr><td style="text-align:left">Max VIF</td><td>NA</td><td>NA</td><td>2.22</td></tr>
|
||||||
|
<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
|
||||||
|
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td><td>0.001</td><td>0.045</td></tr>
|
||||||
|
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td><td>0.00003</td><td>0.043</td></tr>
|
||||||
|
<tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td><td>0.504 (df = 1059)</td><td>0.494 (df = 1057)</td></tr>
|
||||||
|
<tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td><td>1.029 (df = 1; 1059)</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
|
||||||
|
</table>
|
||||||
31
eohi1/regression_EOHI_models.html
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
|
||||||
|
<table style="text-align:center"><caption><strong>Regression Models: EOHI-DGEN</strong></caption>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="3"><em>Dependent variable:</em></td></tr>
|
||||||
|
<tr><td></td><td colspan="3" style="border-bottom: 1px solid black"></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td colspan="3">EOHI-DGEN Mean</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td><td>(3)</td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td><td></td><td>-0.018</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(0.003)</td><td></td><td>(0.005)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td><td></td><td>p = 0.0004<sup>***</sup></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Sex (dummy)</td><td></td><td>0.099</td><td>0.103</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td>(0.107)</td><td>(0.106)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td>p = 0.355</td><td>p = 0.331</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Age x Sex</td><td></td><td></td><td>0.006</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td>(0.007)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td>p = 0.402</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td style="text-align:left">Constant</td><td>0.417</td><td>0.367</td><td>0.364</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>(0.053)</td><td>(0.076)</td><td>(0.075)</td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td><td>p = 0.00001<sup>***</sup></td></tr>
|
||||||
|
<tr><td style="text-align:left"></td><td></td><td></td><td></td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td><td>4192.48</td><td>4176.42</td></tr>
|
||||||
|
<tr><td style="text-align:left">Max VIF</td><td>N/A</td><td>N/A</td><td>2.22</td></tr>
|
||||||
|
<tr><td style="text-align:left">Observations</td><td>1,061</td><td>1,061</td><td>1,061</td></tr>
|
||||||
|
<tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td><td>0.001</td><td>0.020</td></tr>
|
||||||
|
<tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td><td>-0.0001</td><td>0.017</td></tr>
|
||||||
|
<tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td><td>1.742 (df = 1059)</td><td>1.727 (df = 1057)</td></tr>
|
||||||
|
<tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td><td>0.859 (df = 1; 1059)</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr>
|
||||||
|
<tr><td colspan="4" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="3" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
|
||||||
|
</table>
|
||||||
62
eohi1/regression_analysis_report.html
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8"/>
|
||||||
|
<style>body{background-color:white;}</style>
|
||||||
|
|
||||||
|
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<h1>Regression Analysis: Age and Sex Effects on EOHI-DGEN and EHI Domain</h1>
|
||||||
|
<h2>EOHI-DGEN Models</h2>
|
||||||
|
<h3>1. Age Model (age_DGEN)</h3>
|
||||||
|
<div style="margin-bottom: 30px;">
|
||||||
|
<h4>Model Results, AIC, and VIF</h4>
|
||||||
|
<table style="text-align:center"><caption><strong>Age Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.015</td></tr> <tr><td style="text-align:left"></td><td>(0.003)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00002<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.417</td></tr> <tr><td style="text-align:left"></td><td>(0.053)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4174.08</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.018</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>19.407<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
|
||||||
|
<h4>Assumption Diagnostic Plots</h4>
|
||||||
|
<img src="age_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
<h4>Model Visualization</h4>
|
||||||
|
<img src="age_DGEN_plot.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
</div>
|
||||||
|
<h3>2. Sex Model (sex_DGEN)</h3>
|
||||||
|
<div style="margin-bottom: 30px;">
|
||||||
|
<h4>Model Results, AIC, and VIF</h4>
|
||||||
|
<table style="text-align:center"><caption><strong>Sex Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.099</td></tr> <tr><td style="text-align:left"></td><td>(0.107)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.355</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.367</td></tr> <tr><td style="text-align:left"></td><td>(0.076)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4192.48</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>-0.0001</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.742 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>0.859 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
|
||||||
|
<h4>Assumption Diagnostic Plots</h4>
|
||||||
|
<img src="sex_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
</div>
|
||||||
|
<h3>3. Interaction Model (interaction_DGEN)</h3>
|
||||||
|
<div style="margin-bottom: 30px;">
|
||||||
|
<h4>Model Results, AIC, and VIF</h4>
|
||||||
|
<table style="text-align:center"><caption><strong>Interaction Model: EOHI-DGEN</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EOHI-DGEN Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.018</td></tr> <tr><td style="text-align:left"></td><td>(0.005)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.0004<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.103</td></tr> <tr><td style="text-align:left"></td><td>(0.106)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.331</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.007)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.402</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.364</td></tr> <tr><td style="text-align:left"></td><td>(0.075)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>4176.42</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.020</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.017</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>1.727 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>7.017<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
|
||||||
|
<h4>Assumption Diagnostic Plots</h4>
|
||||||
|
<img src="interaction_DGEN_assumptions.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
</div>
|
||||||
|
<h2>EHI Domain Models</h2>
|
||||||
|
<h3>1. Age Model (age_domain)</h3>
|
||||||
|
<div style="margin-bottom: 30px;">
|
||||||
|
<h4>Model Results, AIC, and VIF</h4>
|
||||||
|
<table style="text-align:center"><caption><strong>Age Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.007</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.137</td></tr> <tr><td style="text-align:left"></td><td>(0.015)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1515.75</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.044</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.493 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>49.178<sup>***</sup> (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
|
||||||
|
<h4>Assumption Diagnostic Plots</h4>
|
||||||
|
<img src="age_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
<h4>Model Visualization</h4>
|
||||||
|
<img src="age_domain_plot.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
</div>
|
||||||
|
<h3>2. Sex Model (sex_domain)</h3>
|
||||||
|
<div style="margin-bottom: 30px;">
|
||||||
|
<h4>Model Results, AIC, and VIF</h4>
|
||||||
|
<table style="text-align:center"><caption><strong>Sex Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Sex (dummy)</td><td>0.031</td></tr> <tr><td style="text-align:left"></td><td>(0.031)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.311</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.121</td></tr> <tr><td style="text-align:left"></td><td>(0.022)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1562.88</td></tr> <tr><td style="text-align:left">Max VIF</td><td>N/A</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.001</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.00003</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.504 (df = 1059)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>1.029 (df = 1; 1059)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
|
||||||
|
<h4>Assumption Diagnostic Plots</h4>
|
||||||
|
<img src="sex_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
</div>
|
||||||
|
<h3>3. Interaction Model (interaction_domain)</h3>
|
||||||
|
<div style="margin-bottom: 30px;">
|
||||||
|
<h4>Model Results, AIC, and VIF</h4>
|
||||||
|
<table style="text-align:center"><caption><strong>Interaction Model: EHI Domain</strong></caption> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td><em>Dependent variable:</em></td></tr> <tr><td></td><td colspan="1" style="border-bottom: 1px solid black"></td></tr> <tr><td style="text-align:left"></td><td>EHI Domain Mean</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Age (centered)</td><td>-0.006</td></tr> <tr><td style="text-align:left"></td><td>(0.001)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00001<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Sex (dummy)</td><td>0.033</td></tr> <tr><td style="text-align:left"></td><td>(0.030)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.274</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Age x Sex</td><td>-0.0003</td></tr> <tr><td style="text-align:left"></td><td>(0.002)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.883</td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td style="text-align:left">Constant</td><td>0.120</td></tr> <tr><td style="text-align:left"></td><td>(0.021)</td></tr> <tr><td style="text-align:left"></td><td>p = 0.00000<sup>***</sup></td></tr> <tr><td style="text-align:left"></td><td></td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">AIC</td><td>1518.52</td></tr> <tr><td style="text-align:left">Max VIF</td><td>2.22</td></tr> <tr><td style="text-align:left">Observations</td><td>1,061</td></tr> <tr><td style="text-align:left">R<sup>2</sup></td><td>0.045</td></tr> <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.043</td></tr> <tr><td style="text-align:left">Residual Std. Error</td><td>0.494 (df = 1057)</td></tr> <tr><td style="text-align:left">F Statistic</td><td>16.789<sup>***</sup> (df = 3; 1057)</td></tr> <tr><td colspan="2" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr> </table>
|
||||||
|
<h4>Assumption Diagnostic Plots</h4>
|
||||||
|
<img src="interaction_domain_assumptions.png" style="width: 100%; max-width: 800px;"/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
204
eohi1/reliability_analysis_cronbach_alpha.r
Normal file
@ -0,0 +1,204 @@
|
|||||||
|
# Cronbach's Alpha Reliability Analysis
|
||||||
|
# For 4 domains (preferences, personality, values, life satisfaction) at 2 time points
|
||||||
|
# 5 items per domain per time point
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(psych)
|
||||||
|
library(dplyr)
|
||||||
|
library(tidyr)
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("exp1.csv")
|
||||||
|
|
||||||
|
# Define the scale variables for each domain and time point
|
||||||
|
# Past time point scales
|
||||||
|
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
|
||||||
|
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
|
||||||
|
|
||||||
|
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
|
||||||
|
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
|
||||||
|
|
||||||
|
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
|
||||||
|
"NPastDiff_val_performance", "NPastDiff_val_justice")
|
||||||
|
|
||||||
|
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
|
||||||
|
"NPastDiff_life_important", "NPastDiff_life_change")
|
||||||
|
|
||||||
|
# Future time point scales
|
||||||
|
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
|
||||||
|
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
||||||
|
|
||||||
|
fut_pers_vars <- c("NFutDiff_pers_extravert", "NFutDiff_pers_critical", "NFutDiff_pers_dependable",
|
||||||
|
"NFutDiff_pers_anxious", "NFutDiff_pers_complex")
|
||||||
|
|
||||||
|
fut_val_vars <- c("NFutDiff_val_obey", "NFutDiff_val_trad", "NFutDiff_val_opinion",
|
||||||
|
"NFutDiff_val_performance", "NFutDiff_val_justice")
|
||||||
|
|
||||||
|
fut_life_vars <- c("NFutDiff_life_ideal", "NFutDiff_life_excellent", "NFutDiff_life_satisfied",
|
||||||
|
"NFutDiff_life_important", "NFutDiff_life_change")
|
||||||
|
|
||||||
|
# Function to calculate Cronbach's alpha and return detailed results
|
||||||
|
calc_cronbach_alpha <- function(data, var_names, scale_name) {
|
||||||
|
# Check for missing values
|
||||||
|
scale_data <- data[, var_names]
|
||||||
|
missing_info <- data.frame(
|
||||||
|
Variable = var_names,
|
||||||
|
Missing_Count = colSums(is.na(scale_data)),
|
||||||
|
Missing_Percent = round(colSums(is.na(scale_data)) / nrow(scale_data) * 100, 2)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove rows with any missing values for reliability analysis
|
||||||
|
complete_data <- scale_data[complete.cases(scale_data), ]
|
||||||
|
|
||||||
|
cat("\n", "="*60, "\n")
|
||||||
|
cat("SCALE:", scale_name, "\n")
|
||||||
|
cat("="*60, "\n")
|
||||||
|
|
||||||
|
cat("Sample size for reliability analysis:", nrow(complete_data), "\n")
|
||||||
|
cat("Original sample size:", nrow(data), "\n")
|
||||||
|
cat("Cases removed due to missing data:", nrow(data) - nrow(complete_data), "\n\n")
|
||||||
|
|
||||||
|
cat("Missing data summary:\n")
|
||||||
|
print(missing_info)
|
||||||
|
|
||||||
|
if(nrow(complete_data) < 3) {
|
||||||
|
cat("\nWARNING: Insufficient complete cases for reliability analysis\n")
|
||||||
|
return(NULL)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate Cronbach's alpha
|
||||||
|
alpha_result <- alpha(complete_data, check.keys = TRUE)
|
||||||
|
|
||||||
|
cat("\nCronbach's Alpha Results:\n")
|
||||||
|
cat("Raw alpha:", round(alpha_result$total$raw_alpha, 4), "\n")
|
||||||
|
cat("Standardized alpha:", round(alpha_result$total$std.alpha, 4), "\n")
|
||||||
|
cat("Average inter-item correlation:", round(alpha_result$total$average_r, 4), "\n")
|
||||||
|
cat("Number of items:", alpha_result$total$nvar, "\n")
|
||||||
|
|
||||||
|
# Item statistics
|
||||||
|
cat("\nItem Statistics:\n")
|
||||||
|
item_stats <- data.frame(
|
||||||
|
Item = var_names,
|
||||||
|
Alpha_if_deleted = round(alpha_result$alpha.drop$raw_alpha, 4),
|
||||||
|
Item_total_correlation = round(alpha_result$item.stats$r.drop, 4),
|
||||||
|
Mean = round(alpha_result$item.stats$mean, 4),
|
||||||
|
SD = round(alpha_result$item.stats$sd, 4)
|
||||||
|
)
|
||||||
|
print(item_stats)
|
||||||
|
|
||||||
|
# Check assumptions
|
||||||
|
cat("\nAssumption Checks:\n")
|
||||||
|
|
||||||
|
# 1. Check for sufficient sample size (minimum 30 recommended)
|
||||||
|
sample_size_ok <- nrow(complete_data) >= 30
|
||||||
|
cat("Sample size adequate (≥30):", sample_size_ok, "\n")
|
||||||
|
|
||||||
|
# 2. Check for adequate inter-item correlations (should be > 0.30)
|
||||||
|
inter_item_cors <- cor(complete_data)
|
||||||
|
inter_item_cors[lower.tri(inter_item_cors)] <- NA
|
||||||
|
diag(inter_item_cors) <- NA
|
||||||
|
inter_item_cors_flat <- as.vector(inter_item_cors)
|
||||||
|
inter_item_cors_flat <- inter_item_cors_flat[!is.na(inter_item_cors_flat)]
|
||||||
|
adequate_cors <- sum(inter_item_cors_flat > 0.30) / length(inter_item_cors_flat)
|
||||||
|
cat("Proportion of inter-item correlations > 0.30:", round(adequate_cors, 4), "\n")
|
||||||
|
|
||||||
|
# 3. Check for negative correlations (concerning for unidimensionality)
|
||||||
|
negative_cors <- sum(inter_item_cors_flat < 0, na.rm = TRUE)
|
||||||
|
cat("Number of negative inter-item correlations:", negative_cors, "\n")
|
||||||
|
|
||||||
|
# 4. Check item variances (should be roughly similar)
|
||||||
|
item_vars <- apply(complete_data, 2, var)
|
||||||
|
var_ratio <- max(item_vars) / min(item_vars)
|
||||||
|
cat("Ratio of highest to lowest item variance:", round(var_ratio, 4), "\n")
|
||||||
|
|
||||||
|
return(alpha_result)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate Cronbach's alpha for all scales
|
||||||
|
cat("CRONBACH'S ALPHA RELIABILITY ANALYSIS")
|
||||||
|
cat("\nData: exp1.csv")
|
||||||
|
cat("\nTotal sample size:", nrow(data))
|
||||||
|
|
||||||
|
# Past time point analyses
|
||||||
|
past_pref_alpha <- calc_cronbach_alpha(data, past_pref_vars, "Past Preferences")
|
||||||
|
past_pers_alpha <- calc_cronbach_alpha(data, past_pers_vars, "Past Personality")
|
||||||
|
past_val_alpha <- calc_cronbach_alpha(data, past_val_vars, "Past Values")
|
||||||
|
past_life_alpha <- calc_cronbach_alpha(data, past_life_vars, "Past Life Satisfaction")
|
||||||
|
|
||||||
|
# Future time point analyses
|
||||||
|
fut_pref_alpha <- calc_cronbach_alpha(data, fut_pref_vars, "Future Preferences")
|
||||||
|
fut_pers_alpha <- calc_cronbach_alpha(data, fut_pers_vars, "Future Personality")
|
||||||
|
fut_val_alpha <- calc_cronbach_alpha(data, fut_val_vars, "Future Values")
|
||||||
|
fut_life_alpha <- calc_cronbach_alpha(data, fut_life_vars, "Future Life Satisfaction")
|
||||||
|
|
||||||
|
# Summary table
|
||||||
|
cat("\n", "="*80, "\n")
|
||||||
|
cat("SUMMARY OF CRONBACH'S ALPHA COEFFICIENTS")
|
||||||
|
cat("\n", "="*80, "\n")
|
||||||
|
|
||||||
|
summary_results <- data.frame(
|
||||||
|
Scale = c("Past Preferences", "Past Personality", "Past Values", "Past Life Satisfaction",
|
||||||
|
"Future Preferences", "Future Personality", "Future Values", "Future Life Satisfaction"),
|
||||||
|
Alpha = c(
|
||||||
|
if(!is.null(past_pref_alpha)) round(past_pref_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(past_pers_alpha)) round(past_pers_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(past_val_alpha)) round(past_val_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(past_life_alpha)) round(past_life_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(fut_pref_alpha)) round(fut_pref_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(fut_pers_alpha)) round(fut_pers_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(fut_val_alpha)) round(fut_val_alpha$total$raw_alpha, 4) else NA,
|
||||||
|
if(!is.null(fut_life_alpha)) round(fut_life_alpha$total$raw_alpha, 4) else NA
|
||||||
|
),
|
||||||
|
Items = rep(5, 8),
|
||||||
|
Interpretation = c(
|
||||||
|
if(!is.null(past_pref_alpha)) {
|
||||||
|
alpha_val <- past_pref_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(past_pers_alpha)) {
|
||||||
|
alpha_val <- past_pers_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(past_val_alpha)) {
|
||||||
|
alpha_val <- past_val_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(past_life_alpha)) {
|
||||||
|
alpha_val <- past_life_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(fut_pref_alpha)) {
|
||||||
|
alpha_val <- fut_pref_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(fut_pers_alpha)) {
|
||||||
|
alpha_val <- fut_pers_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(fut_val_alpha)) {
|
||||||
|
alpha_val <- fut_val_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data",
|
||||||
|
if(!is.null(fut_life_alpha)) {
|
||||||
|
alpha_val <- fut_life_alpha$total$raw_alpha
|
||||||
|
if(alpha_val >= 0.90) "Excellent" else if(alpha_val >= 0.80) "Good" else if(alpha_val >= 0.70) "Acceptable" else if(alpha_val >= 0.60) "Questionable" else "Poor"
|
||||||
|
} else "Insufficient data"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
print(summary_results)
|
||||||
|
|
||||||
|
# Save results to CSV
|
||||||
|
write.csv(summary_results, "cronbach_alpha_summary.csv", row.names = FALSE)
|
||||||
|
cat("\nSummary results saved to: cronbach_alpha_summary.csv\n")
|
||||||
|
|
||||||
|
cat("\n", "="*80, "\n")
|
||||||
|
cat("INTERPRETATION GUIDE FOR CRONBACH'S ALPHA")
|
||||||
|
cat("\n", "="*80, "\n")
|
||||||
|
cat("α ≥ 0.90: Excellent reliability\n")
|
||||||
|
cat("α ≥ 0.80: Good reliability\n")
|
||||||
|
cat("α ≥ 0.70: Acceptable reliability\n")
|
||||||
|
cat("α ≥ 0.60: Questionable reliability\n")
|
||||||
|
cat("α < 0.60: Poor reliability\n")
|
||||||
|
|
||||||
|
|
||||||
BIN
eohi1/residual_plots.pdf
Normal file
BIN
eohi1/results - exp1 - mixed anova DGEN.md
Normal file
BIN
eohi1/results exp1 - mixed anova - domain means.md
Normal file
BIN
eohi1/sex_DGEN_assumptions.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
eohi1/sex_domain_assumptions.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
14
eohi1/spearman_correlations.csv
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
"","eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean","ehi_pref_mean","ehi_pers_mean","ehi_val_mean","ehi_life_mean","ehi_global_mean","AOT_total","CRT_correct","CRT_int"
|
||||||
|
"eohiDGEN_pref",1,0.27683,0.23964,0.24515,0.66949,0.22538,0.09704,0.08007,0.12507,0.21415,0.06836,0.05361,-0.05819
|
||||||
|
"eohiDGEN_pers",0.27683,1,0.31901,0.18576,0.70868,0.21513,0.18684,0.13159,0.12199,0.26738,0.0916,0.02566,-0.01732
|
||||||
|
"eohiDGEN_val",0.23964,0.31901,1,0.13719,0.68261,0.14816,0.12424,0.23448,0.06907,0.21756,0.10396,0.07479,-0.06044
|
||||||
|
"eohiDGEN_life",0.24515,0.18576,0.13719,1,0.25499,0.02053,0.06528,0.04652,0.29721,0.23854,0.11795,0.03949,-0.0013
|
||||||
|
"eohiDGEN_mean",0.66949,0.70868,0.68261,0.25499,1,0.26963,0.17831,0.19688,0.1283,0.31052,0.11102,0.06665,-0.05731
|
||||||
|
"ehi_pref_mean",0.22538,0.21513,0.14816,0.02053,0.26963,1,0.23765,0.19925,0.0625,0.51305,0.0504,0.06892,-0.06473
|
||||||
|
"ehi_pers_mean",0.09704,0.18684,0.12424,0.06528,0.17831,0.23765,1,0.22369,0.12488,0.5826,0.03132,0.05133,-0.04906
|
||||||
|
"ehi_val_mean",0.08007,0.13159,0.23448,0.04652,0.19688,0.19925,0.22369,1,0.02752,0.51294,0.01519,0.00599,-0.03791
|
||||||
|
"ehi_life_mean",0.12507,0.12199,0.06907,0.29721,0.1283,0.0625,0.12488,0.02752,1,0.59399,0.07295,0.01639,-0.00581
|
||||||
|
"ehi_global_mean",0.21415,0.26738,0.21756,0.23854,0.31052,0.51305,0.5826,0.51294,0.59399,1,0.0849,0.05133,-0.05746
|
||||||
|
"AOT_total",0.06836,0.0916,0.10396,0.11795,0.11102,0.0504,0.03132,0.01519,0.07295,0.0849,1,0.2609,-0.20963
|
||||||
|
"CRT_correct",0.05361,0.02566,0.07479,0.03949,0.06665,0.06892,0.05133,0.00599,0.01639,0.05133,0.2609,1,-0.87087
|
||||||
|
"CRT_int",-0.05819,-0.01732,-0.06044,-0.0013,-0.05731,-0.06473,-0.04906,-0.03791,-0.00581,-0.05746,-0.20963,-0.87087,1
|
||||||
|
145
eohi2/EHI reliability.html
Normal file
@ -0,0 +1,145 @@
|
|||||||
|
<html><head><title>EHI Reliability Analysis</title></head><body><h1>EHI Reliability Analysis</h1><h2>Cronbach's Alpha</h2><table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> </th>
|
||||||
|
<th style="text-align:right;"> raw_alpha </th>
|
||||||
|
<th style="text-align:right;"> std.alpha </th>
|
||||||
|
<th style="text-align:right;"> G6(smc) </th>
|
||||||
|
<th style="text-align:right;"> average_r </th>
|
||||||
|
<th style="text-align:right;"> S/N </th>
|
||||||
|
<th style="text-align:right;"> ase </th>
|
||||||
|
<th style="text-align:right;"> mean </th>
|
||||||
|
<th style="text-align:right;"> sd </th>
|
||||||
|
<th style="text-align:right;"> median_r </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> </td>
|
||||||
|
<td style="text-align:right;"> 0.5709145 </td>
|
||||||
|
<td style="text-align:right;"> 0.6860723 </td>
|
||||||
|
<td style="text-align:right;"> 0.6450534 </td>
|
||||||
|
<td style="text-align:right;"> 0.3533207 </td>
|
||||||
|
<td style="text-align:right;"> 2.185446 </td>
|
||||||
|
<td style="text-align:right;"> 0.0229978 </td>
|
||||||
|
<td style="text-align:right;"> 0.138301 </td>
|
||||||
|
<td style="text-align:right;"> 0.8228433 </td>
|
||||||
|
<td style="text-align:right;"> 0.3517941 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table><h2>Split-Half Reliability</h2><p>Maximum split half reliability: 0.77987</p><h2>Item-Level Statistics</h2><table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> </th>
|
||||||
|
<th style="text-align:right;"> n </th>
|
||||||
|
<th style="text-align:right;"> raw.r </th>
|
||||||
|
<th style="text-align:right;"> std.r </th>
|
||||||
|
<th style="text-align:right;"> r.cor </th>
|
||||||
|
<th style="text-align:right;"> r.drop </th>
|
||||||
|
<th style="text-align:right;"> mean </th>
|
||||||
|
<th style="text-align:right;"> sd </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehiDGEN_5_mean </td>
|
||||||
|
<td style="text-align:right;"> 489 </td>
|
||||||
|
<td style="text-align:right;"> 0.7653498 </td>
|
||||||
|
<td style="text-align:right;"> 0.6789150 </td>
|
||||||
|
<td style="text-align:right;"> 0.5035473 </td>
|
||||||
|
<td style="text-align:right;"> 0.4480613 </td>
|
||||||
|
<td style="text-align:right;"> 0.0620314 </td>
|
||||||
|
<td style="text-align:right;"> 1.4573526 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehiDGEN_10_mean </td>
|
||||||
|
<td style="text-align:right;"> 489 </td>
|
||||||
|
<td style="text-align:right;"> 0.8686359 </td>
|
||||||
|
<td style="text-align:right;"> 0.7381306 </td>
|
||||||
|
<td style="text-align:right;"> 0.6097580 </td>
|
||||||
|
<td style="text-align:right;"> 0.5098844 </td>
|
||||||
|
<td style="text-align:right;"> 0.3006135 </td>
|
||||||
|
<td style="text-align:right;"> 1.8924455 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehi5_global_mean </td>
|
||||||
|
<td style="text-align:right;"> 489 </td>
|
||||||
|
<td style="text-align:right;"> 0.5032176 </td>
|
||||||
|
<td style="text-align:right;"> 0.7114339 </td>
|
||||||
|
<td style="text-align:right;"> 0.5651807 </td>
|
||||||
|
<td style="text-align:right;"> 0.3871421 </td>
|
||||||
|
<td style="text-align:right;"> 0.0675187 </td>
|
||||||
|
<td style="text-align:right;"> 0.4620107 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehi10_global_mean </td>
|
||||||
|
<td style="text-align:right;"> 489 </td>
|
||||||
|
<td style="text-align:right;"> 0.5705196 </td>
|
||||||
|
<td style="text-align:right;"> 0.7420342 </td>
|
||||||
|
<td style="text-align:right;"> 0.6269733 </td>
|
||||||
|
<td style="text-align:right;"> 0.4474763 </td>
|
||||||
|
<td style="text-align:right;"> 0.1230402 </td>
|
||||||
|
<td style="text-align:right;"> 0.5252241 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table><h3>Alpha if Item Dropped</h3><table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th style="text-align:left;"> </th>
|
||||||
|
<th style="text-align:right;"> raw_alpha </th>
|
||||||
|
<th style="text-align:right;"> std.alpha </th>
|
||||||
|
<th style="text-align:right;"> G6(smc) </th>
|
||||||
|
<th style="text-align:right;"> average_r </th>
|
||||||
|
<th style="text-align:right;"> S/N </th>
|
||||||
|
<th style="text-align:right;"> alpha se </th>
|
||||||
|
<th style="text-align:right;"> var.r </th>
|
||||||
|
<th style="text-align:right;"> med.r </th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehiDGEN_5_mean </td>
|
||||||
|
<td style="text-align:right;"> 0.4125021 </td>
|
||||||
|
<td style="text-align:right;"> 0.6576471 </td>
|
||||||
|
<td style="text-align:right;"> 0.5754825 </td>
|
||||||
|
<td style="text-align:right;"> 0.3903632 </td>
|
||||||
|
<td style="text-align:right;"> 1.920963 </td>
|
||||||
|
<td style="text-align:right;"> 0.0273016 </td>
|
||||||
|
<td style="text-align:right;"> 0.0105384 </td>
|
||||||
|
<td style="text-align:right;"> 0.4162485 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehiDGEN_10_mean </td>
|
||||||
|
<td style="text-align:right;"> 0.4091906 </td>
|
||||||
|
<td style="text-align:right;"> 0.6003996 </td>
|
||||||
|
<td style="text-align:right;"> 0.5190903 </td>
|
||||||
|
<td style="text-align:right;"> 0.3337035 </td>
|
||||||
|
<td style="text-align:right;"> 1.502500 </td>
|
||||||
|
<td style="text-align:right;"> 0.0345773 </td>
|
||||||
|
<td style="text-align:right;"> 0.0161841 </td>
|
||||||
|
<td style="text-align:right;"> 0.2873396 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehi5_global_mean </td>
|
||||||
|
<td style="text-align:right;"> 0.5572211 </td>
|
||||||
|
<td style="text-align:right;"> 0.6271436 </td>
|
||||||
|
<td style="text-align:right;"> 0.5440163 </td>
|
||||||
|
<td style="text-align:right;"> 0.3592479 </td>
|
||||||
|
<td style="text-align:right;"> 1.681998 </td>
|
||||||
|
<td style="text-align:right;"> 0.0259296 </td>
|
||||||
|
<td style="text-align:right;"> 0.0113815 </td>
|
||||||
|
<td style="text-align:right;"> 0.4162485 </td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td style="text-align:left;"> ehi10_global_mean </td>
|
||||||
|
<td style="text-align:right;"> 0.5282880 </td>
|
||||||
|
<td style="text-align:right;"> 0.5963510 </td>
|
||||||
|
<td style="text-align:right;"> 0.5041975 </td>
|
||||||
|
<td style="text-align:right;"> 0.3299683 </td>
|
||||||
|
<td style="text-align:right;"> 1.477400 </td>
|
||||||
|
<td style="text-align:right;"> 0.0271341 </td>
|
||||||
|
<td style="text-align:right;"> 0.0068450 </td>
|
||||||
|
<td style="text-align:right;"> 0.2873396 </td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table></body></html>
|
||||||
1047
eohi2/README_Variable_Creation.txt
Normal file
0
eohi2/RMD - mixed anova DGEN.rmd
Normal file
BIN
eohi2/Rplots.pdf
Normal file
7
eohi2/STD_EHI_correlation_matrix.csv
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
|
||||||
|
sex_dummy,0.079,,,,,
|
||||||
|
demo_age_1,-0.22***,-0.028,,,,
|
||||||
|
edu_num,-0.027,0.0094,-0.078,,,
|
||||||
|
aot_total,-0.028,-0.072,-0.20***,0.10*,,
|
||||||
|
crt_correct,0.11*,-0.16***,-0.0044,0.12**,-0.037,
|
||||||
|
crt_int,-0.12**,0.14**,0.028,-0.11*,-0.011,-0.88***
|
||||||
|
7
eohi2/STD_EHI_correlation_pvalues.csv
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
,stdEHI_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
|
||||||
|
sex_dummy,0.080650819,,,,,
|
||||||
|
demo_age_1,1.40537E-06,0.536013997,,,,
|
||||||
|
edu_num,0.551130545,0.835590798,0.086262901,,,
|
||||||
|
aot_total,0.545115793,0.112852544,5.87906E-06,0.025659604,,
|
||||||
|
crt_correct,0.011928378,0.000483116,0.923101803,0.00884239,0.4122211,
|
||||||
|
crt_int,0.007552737,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
|
||||||
|
100
eohi2/correlation matrix 2 - std ehi.r
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
df <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
data <- df %>%
|
||||||
|
select(stdEHI_mean, demo_sex, demo_age_1, edu3, aot_total, crt_correct, crt_int) %>%
|
||||||
|
filter(demo_sex != "Prefer not to say")
|
||||||
|
|
||||||
|
print(colSums(is.na(data)))
|
||||||
|
print(sapply(data, class))
|
||||||
|
|
||||||
|
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||||
|
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||||
|
|
||||||
|
# Verify the dummy coding
|
||||||
|
print(table(data$demo_sex, data$sex_dummy))
|
||||||
|
|
||||||
|
#descriptives
|
||||||
|
|
||||||
|
# Descriptives for age
|
||||||
|
print(summary(data$demo_age_1))
|
||||||
|
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||||
|
|
||||||
|
|
||||||
|
# Descriptives for sex (frequency table)
|
||||||
|
print(table(data$demo_sex))
|
||||||
|
print(prop.table(table(data$demo_sex)))
|
||||||
|
|
||||||
|
# Descriptives for sex dummy variable
|
||||||
|
print(table(data$sex_dummy))
|
||||||
|
|
||||||
|
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||||
|
# First ensure edu3 is a factor, then convert to numeric
|
||||||
|
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||||
|
data$edu_num <- as.numeric(data$edu3)
|
||||||
|
|
||||||
|
# Check the numeric conversion
|
||||||
|
print(table(data$edu_num, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Verify the conversion
|
||||||
|
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||||
|
|
||||||
|
####correlation matrix ####
|
||||||
|
|
||||||
|
# Select numeric variables for correlation matrix
|
||||||
|
numeric_vars <- data %>%
|
||||||
|
select(stdEHI_mean, sex_dummy, demo_age_1, edu_num, aot_total, crt_correct, crt_int)
|
||||||
|
|
||||||
|
# Create Spearman correlation matrix
|
||||||
|
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||||
|
|
||||||
|
# Print correlation matrix
|
||||||
|
print(round(cor_matrix, 3))
|
||||||
|
|
||||||
|
# Get significance tests for correlations using psych package
|
||||||
|
library(psych)
|
||||||
|
|
||||||
|
# Create correlation matrix with significance tests
|
||||||
|
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||||
|
|
||||||
|
# Print correlation matrix
|
||||||
|
print(round(cor_test$r, 3))
|
||||||
|
|
||||||
|
# Print p-values
|
||||||
|
print(round(cor_test$p, 3))
|
||||||
|
|
||||||
|
# Print all correlations with r and p values (for reporting)
|
||||||
|
for(i in 1:nrow(cor_test$r)) {
|
||||||
|
for(j in 1:ncol(cor_test$r)) {
|
||||||
|
if(i != j) { # Skip diagonal
|
||||||
|
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||||
|
": r =", round(cor_test$r[i, j], 3),
|
||||||
|
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Also print significant correlations summary
|
||||||
|
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||||
|
if(nrow(sig_cors) > 0) {
|
||||||
|
for(i in 1:nrow(sig_cors)) {
|
||||||
|
row_idx <- sig_cors[i, 1]
|
||||||
|
col_idx <- sig_cors[i, 2]
|
||||||
|
if(row_idx != col_idx) { # Skip diagonal
|
||||||
|
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||||
|
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||||
|
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save correlation matrix and p-values to CSV files
|
||||||
|
write.csv(cor_test$r, "STD_EHI_correlation_matrix.csv", row.names = TRUE)
|
||||||
|
write.csv(cor_test$p, "STD_EHI_correlation_pvalues.csv", row.names = TRUE)
|
||||||
|
print("Correlation matrix saved to STD_EHI_correlation_matrix.csv")
|
||||||
|
print("P-values saved to STD_EHI_correlation_pvalues.csv")
|
||||||
63
eohi2/correlationCORRECT_exp2.csv
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
Variable1,Variable2,Spearman_r,P_value
|
||||||
|
ehiDGEN_5_Pers,aot_total,-0.136252077,0.002533005
|
||||||
|
ehi5_global_mean,crt_int,-0.111415172,0.013696124
|
||||||
|
ehi5_pref_MEAN,crt_int,-0.111269786,0.013820353
|
||||||
|
ehi5_pref_MEAN,crt_correct,0.092043471,0.041901166
|
||||||
|
ehi5_global_mean,crt_correct,0.091771139,0.042513801
|
||||||
|
,,,
|
||||||
|
ehi10_global_mean,crt_correct,0.086125328,0.057016745
|
||||||
|
ehi10_global_mean,crt_int,-0.077752437,0.085875618
|
||||||
|
ehi10_global_mean,aot_total,0.038353442,0.397405642
|
||||||
|
ehi10_pers_MEAN,crt_int,-0.078358781,0.083451324
|
||||||
|
ehi10_pers_MEAN,crt_correct,0.062086308,0.170454491
|
||||||
|
ehi10_pers_MEAN,aot_total,0.008627548,0.849074663
|
||||||
|
ehi10_pref_MEAN,crt_correct,0.076608495,0.090603647
|
||||||
|
ehi10_pref_MEAN,crt_int,-0.020948272,0.64400852
|
||||||
|
ehi10_pref_MEAN,aot_total,-0.006633556,0.883671698
|
||||||
|
ehi10_val_MEAN,crt_correct,0.05989196,0.186098498
|
||||||
|
ehi10_val_MEAN,crt_int,-0.057978747,0.200580439
|
||||||
|
ehi10_val_MEAN,aot_total,-0.003835901,0.932573186
|
||||||
|
ehi5.10_global_mean,crt_correct,0.033565462,0.458964083
|
||||||
|
ehi5.10_global_mean,aot_total,-0.031442998,0.487869173
|
||||||
|
ehi5.10_global_mean,crt_int,-0.023334683,0.606722562
|
||||||
|
ehi5.10_pers_MEAN,crt_correct,0.030973863,0.494387766
|
||||||
|
ehi5.10_pers_MEAN,crt_int,-0.02351475,0.603948546
|
||||||
|
ehi5.10_pers_MEAN,aot_total,-0.006387901,0.887950695
|
||||||
|
ehi5.10_pref_MEAN,aot_total,-0.012595122,0.781151691
|
||||||
|
ehi5.10_pref_MEAN,crt_correct,-0.011596204,0.798118255
|
||||||
|
ehi5.10_pref_MEAN,crt_int,0.00927269,0.837939503
|
||||||
|
ehi5.10_val_MEAN,crt_int,-0.02343742,0.60513915
|
||||||
|
ehi5.10_val_MEAN,aot_total,-0.01868822,0.680165418
|
||||||
|
ehi5.10_val_MEAN,crt_correct,0.016882557,0.709594395
|
||||||
|
ehi5_global_mean,aot_total,-0.012144664,0.788790373
|
||||||
|
ehi5_pers_MEAN,crt_correct,0.041879574,0.35541717
|
||||||
|
ehi5_pers_MEAN,crt_int,-0.038984568,0.38967884
|
||||||
|
ehi5_pers_MEAN,aot_total,-0.002320273,0.959183882
|
||||||
|
ehi5_pref_MEAN,aot_total,-0.009156402,0.839944313
|
||||||
|
ehi5_val_MEAN,crt_int,-0.060691086,0.180283214
|
||||||
|
ehi5_val_MEAN,aot_total,-0.043640227,0.335537257
|
||||||
|
ehi5_val_MEAN,crt_correct,0.038349744,0.397451186
|
||||||
|
ehiDGEN_10_mean,crt_int,-0.060692684,0.180271722
|
||||||
|
ehiDGEN_10_mean,crt_correct,0.048693261,0.282530257
|
||||||
|
ehiDGEN_10_mean,aot_total,-0.007003919,0.877226879
|
||||||
|
ehiDGEN_10_Pers,crt_int,-0.08321639,0.065963243
|
||||||
|
ehiDGEN_10_Pers,crt_correct,0.062811394,0.165507368
|
||||||
|
ehiDGEN_10_Pers,aot_total,-0.005213733,0.908446087
|
||||||
|
ehiDGEN_10_Pref,crt_correct,0.056920022,0.208938511
|
||||||
|
ehiDGEN_10_Pref,crt_int,-0.054154647,0.231949195
|
||||||
|
ehiDGEN_10_Pref,aot_total,0.012229383,0.787352177
|
||||||
|
ehiDGEN_10_Val,crt_correct,-0.046875479,0.300907385
|
||||||
|
ehiDGEN_10_Val,aot_total,-0.041722577,0.35722513
|
||||||
|
ehiDGEN_10_Val,crt_int,0.029931685,0.509033307
|
||||||
|
ehiDGEN_5_mean,aot_total,-0.068430961,0.130753688
|
||||||
|
ehiDGEN_5_mean,crt_correct,0.047639811,0.293085717
|
||||||
|
ehiDGEN_5_mean,crt_int,-0.042472828,0.348637399
|
||||||
|
ehiDGEN_5_Pers,crt_correct,0.061428736,0.175035746
|
||||||
|
ehiDGEN_5_Pers,crt_int,-0.038746148,0.392587023
|
||||||
|
ehiDGEN_5_Pref,crt_int,-0.042178047,0.351995885
|
||||||
|
ehiDGEN_5_Pref,aot_total,-0.038691604,0.393254174
|
||||||
|
ehiDGEN_5_Pref,crt_correct,0.033401162,0.461166903
|
||||||
|
ehiDGEN_5_Val,aot_total,-0.080834838,0.074119126
|
||||||
|
ehiDGEN_5_Val,crt_int,-0.024116805,0.594715088
|
||||||
|
ehiDGEN_5_Val,crt_correct,0.02187202,0.629462582
|
||||||
|
,,,
|
||||||
|
14
eohi2/correlation_matrix.csv
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
|
||||||
|
ehiDGEN_10_mean,0.42***,,,,,,,,
|
||||||
|
ehi5_global_mean,0.21***,0.21***,,,,,,,
|
||||||
|
ehi10_global_mean,0.19***,0.37***,0.43***,,,,,,
|
||||||
|
sex_dummy,0.074,0.053,0.023,0.051,,,,,
|
||||||
|
demo_age_1,-0.053,-0.22***,-0.21***,-0.26***,-0.028,,,,
|
||||||
|
edu_num,-0.031,-0.037,0.083,-0.073,0.0094,-0.078,,,
|
||||||
|
aot_total,-0.064,-0.0067,-0.0032,0.050,-0.072,-0.20***,0.10*,,
|
||||||
|
crt_correct,0.045,0.053,0.093*,0.090*,-0.16***,-0.0044,0.12**,-0.037,
|
||||||
|
crt_int,-0.041,-0.065,-0.11*,-0.083,0.14*,0.028,-0.11*,-0.011,-0.88***
|
||||||
|
,,,,,,,,,
|
||||||
|
*p<0.05,,,,,,,,,
|
||||||
|
**p<0.01,,,,,,,,,
|
||||||
|
***p<0.001,,,,,,,,,
|
||||||
|
BIN
eohi2/correlation_plot_domain_general_vars_spearman.pdf
Normal file
BIN
eohi2/correlation_plot_domain_vars_pearson.pdf
Normal file
BIN
eohi2/correlation_plot_domain_vars_spearman.pdf
Normal file
BIN
eohi2/correlation_plot_scales_spearman.pdf
Normal file
10
eohi2/correlation_pvalues.csv
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
,ehiDGEN_5_mean,ehiDGEN_10_mean,ehi5_global_mean,ehi10_global_mean,sex_dummy,demo_age_1,edu_num,aot_total,crt_correct
|
||||||
|
ehiDGEN_10_mean,2.9929E-22,,,,,,,,
|
||||||
|
ehi5_global_mean,4.14491E-06,3.02E-06,,,,,,,
|
||||||
|
ehi10_global_mean,1.61573E-05,1.66719E-17,1.63546E-23,,,,,,
|
||||||
|
sex_dummy,0.104170333,0.245085093,0.606827914,0.260991884,,,,,
|
||||||
|
demo_age_1,0.239362544,1.57395E-06,4.57184E-06,5.31327E-09,0.536013997,,,,
|
||||||
|
edu_num,0.489059792,0.41328311,0.067372333,0.10843647,0.835590798,0.086262901,,,
|
||||||
|
aot_total,0.156912095,0.882372703,0.943759329,0.269428383,0.112852544,5.87906E-06,0.025659604,,
|
||||||
|
crt_correct,0.319158511,0.24026811,0.039792024,0.047593264,0.000483116,0.923101803,0.00884239,0.4122211,
|
||||||
|
crt_int,0.365601388,0.150705909,0.01133869,0.065890723,0.001816488,0.533287266,0.012949289,0.810348767,9.5293E-160
|
||||||
|
34
eohi2/correlations - domain general vars.csv
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
Variable1,Variable2,Spearman_r,P_value
|
||||||
|
DGEN_10_global_mean,aot_total,0.393039545,0
|
||||||
|
DGEN_5_global_mean,aot_total,0.401732294,0
|
||||||
|
DGEN_fut_10_mean,aot_total,0.372705919,0
|
||||||
|
DGEN_fut_5_mean,aot_total,0.400202502,0
|
||||||
|
DGEN_past_10_mean,aot_total,0.368847521,0
|
||||||
|
DGEN_past_5_mean,aot_total,0.376820454,0
|
||||||
|
DGENfut_global_mean,aot_total,0.382481063,0
|
||||||
|
DGENpast_global_mean,aot_total,0.369258554,0
|
||||||
|
DGEN_fut_5_mean,crt_correct,-0.08823341,0.051182461
|
||||||
|
DGENfut_global_mean,crt_correct,-0.086607229,0.05563658
|
||||||
|
DGEN_5_global_mean,crt_correct,-0.079087177,0.080612427
|
||||||
|
DGEN_fut_10_mean,crt_correct,-0.073786402,0.103162718
|
||||||
|
DGEN_fut_5.10_mean,aot_total,0.06505662,0.150871574
|
||||||
|
DGEN_10_global_mean,crt_correct,-0.060769258,0.17972166
|
||||||
|
DGEN_5.10_global_mean,aot_total,0.058102985,0.199615824
|
||||||
|
DGEN_past_5_mean,crt_correct,-0.057080059,0.207659218
|
||||||
|
DGEN_fut_5.10_mean,crt_correct,-0.052644919,0.245242874
|
||||||
|
DGEN_past_5.10_mean,aot_total,0.050343754,0.266514845
|
||||||
|
DGEN_fut_5.10_mean,crt_int,0.049423543,0.275365524
|
||||||
|
DGENpast_global_mean,crt_correct,-0.048081298,0.288630365
|
||||||
|
DGEN_past_10_mean,crt_int,-0.042776663,0.345197086
|
||||||
|
DGEN_5.10_global_mean,crt_correct,-0.036910124,0.415418734
|
||||||
|
DGENpast_global_mean,crt_int,-0.035697633,0.430916171
|
||||||
|
DGEN_past_10_mean,crt_correct,-0.035562484,0.432664043
|
||||||
|
DGEN_past_5_mean,crt_int,-0.031740856,0.483754572
|
||||||
|
DGEN_10_global_mean,crt_int,-0.019205703,0.671817818
|
||||||
|
DGEN_past_5.10_mean,crt_int,-0.015826279,0.727015617
|
||||||
|
DGEN_5_global_mean,crt_int,-0.014881366,0.742720553
|
||||||
|
DGEN_past_5.10_mean,crt_correct,-0.013651301,0.763324994
|
||||||
|
DGEN_5.10_global_mean,crt_int,0.011869586,0.793465063
|
||||||
|
DGENfut_global_mean,crt_int,0.009764448,0.829473295
|
||||||
|
DGEN_fut_10_mean,crt_int,0.002368134,0.95834271
|
||||||
|
DGEN_fut_5_mean,crt_int,-0.0005323,0.990632398
|
||||||
|
175
eohi2/correlations - domain general vars.r
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(corrplot)
|
||||||
|
library(Hmisc)
|
||||||
|
library(psych)
|
||||||
|
|
||||||
|
# Load the data
|
||||||
|
exp2_data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Define the two sets of variables
|
||||||
|
set1_vars <- c("DGEN_past_5_mean", "DGEN_past_10_mean", "DGEN_fut_5_mean", "DGEN_fut_10_mean",
|
||||||
|
"DGEN_past_5.10_mean", "DGEN_fut_5.10_mean", "DGENpast_global_mean",
|
||||||
|
"DGENfut_global_mean", "DGEN_5_global_mean", "DGEN_10_global_mean", "DGEN_5.10_global_mean")
|
||||||
|
set2_vars <- c("aot_total", "crt_correct", "crt_int")
|
||||||
|
|
||||||
|
# Create subset with only the variables of interest
|
||||||
|
correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
|
||||||
|
|
||||||
|
# ===== NORMALITY CHECKS =====
|
||||||
|
# Shapiro-Wilk tests for normality (n < 5000)
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
if(length(na.omit(correlation_data[[var]])) <= 5000) {
|
||||||
|
shapiro_result <- shapiro.test(correlation_data[[var]])
|
||||||
|
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
|
||||||
|
var, shapiro_result$p.value,
|
||||||
|
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Visual normality checks
|
||||||
|
pdf("normality_plots_domain_general_vars.pdf", width = 12, height = 8)
|
||||||
|
par(mfrow = c(2, 4))
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
# Histogram with normal curve overlay
|
||||||
|
hist(correlation_data[[var]], main = paste("Histogram:", var),
|
||||||
|
xlab = var, freq = FALSE)
|
||||||
|
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
|
||||||
|
sd = sd(correlation_data[[var]], na.rm = TRUE)),
|
||||||
|
add = TRUE, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Q-Q plot
|
||||||
|
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
|
||||||
|
qqline(correlation_data[[var]], col = "red", lwd = 2)
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# ===== LINEARITY CHECKS =====
|
||||||
|
# Check linearity between variable pairs
|
||||||
|
pdf("linearity_plots_domain_general_vars.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(4, 3))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
# Scatter plot with regression line
|
||||||
|
plot(correlation_data[[var1]], correlation_data[[var2]],
|
||||||
|
main = paste(var1, "vs", var2),
|
||||||
|
xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
|
||||||
|
|
||||||
|
# Add linear regression line
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
abline(lm_fit, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add LOESS smooth line for non-linear pattern detection
|
||||||
|
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
|
||||||
|
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
|
||||||
|
loess_pred <- predict(loess_fit, x_seq)
|
||||||
|
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
|
||||||
|
|
||||||
|
# Calculate R-squared for linear fit
|
||||||
|
r_squared <- summary(lm_fit)$r.squared
|
||||||
|
cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Residual analysis for linearity
|
||||||
|
pdf("residual_plots_domain_general_vars.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(4, 3))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
residuals <- residuals(lm_fit)
|
||||||
|
fitted <- fitted(lm_fit)
|
||||||
|
|
||||||
|
plot(fitted, residuals,
|
||||||
|
main = paste("Residuals:", var1, "vs", var2),
|
||||||
|
xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
|
||||||
|
abline(h = 0, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add smooth line to residuals
|
||||||
|
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
# Calculate Spearman correlation matrix only
|
||||||
|
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
|
||||||
|
|
||||||
|
# Print correlation matrix with 5 decimal places
|
||||||
|
print(round(cor_matrix_spearman, 5))
|
||||||
|
|
||||||
|
# Separate correlations between the two sets (Spearman)
|
||||||
|
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
|
||||||
|
print(round(set1_set2_cor, 5))
|
||||||
|
|
||||||
|
# Calculate correlations within each set (Spearman)
|
||||||
|
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
|
||||||
|
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
|
||||||
|
|
||||||
|
# Statistical significance tests (Spearman)
|
||||||
|
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
|
||||||
|
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create correlation plot for Spearman only
|
||||||
|
pdf("correlation_plot_domain_general_vars_spearman.pdf", width = 10, height = 8)
|
||||||
|
corrplot(cor_matrix_spearman, method = "color", type = "upper",
|
||||||
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
||||||
|
addCoef.col = "black", number.cex = 0.7,
|
||||||
|
title = "Spearman Correlation Matrix: Domain-General Vars vs Cognitive Measures")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Summary statistics
|
||||||
|
desc_stats <- describe(correlation_data)
|
||||||
|
print(round(desc_stats, 5))
|
||||||
|
|
||||||
|
# Save results to CSV files
|
||||||
|
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_general_vars.csv")
|
||||||
|
write.csv(round(desc_stats, 5), "descriptive_statistics_domain_general_vars.csv")
|
||||||
|
|
||||||
|
# Save correlation results in a formatted table
|
||||||
|
cor_results <- data.frame(
|
||||||
|
Variable1 = character(),
|
||||||
|
Variable2 = character(),
|
||||||
|
Spearman_r = numeric(),
|
||||||
|
P_value = numeric(),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract correlations between sets
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
r_val <- cor_matrix_spearman[var1, var2]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
|
||||||
|
cor_results <- rbind(cor_results, data.frame(
|
||||||
|
Variable1 = var1,
|
||||||
|
Variable2 = var2,
|
||||||
|
Spearman_r = r_val,
|
||||||
|
P_value = p_val,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write.csv(cor_results, "correlations - domain general vars.csv", row.names = FALSE)
|
||||||
34
eohi2/correlations - domain specific vars.csv
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
Variable1,Variable2,Spearman_r,P_value
|
||||||
|
N10_global_mean,aot_total,0.150441363,0.000846088
|
||||||
|
N5_global_mean,aot_total,0.185910923,3.52323E-05
|
||||||
|
NFut_10_mean,aot_total,0.155140023,0.000575829
|
||||||
|
NFut_5_mean,aot_total,0.199536342,8.75208E-06
|
||||||
|
NFut_global_mean,aot_total,0.185343328,3.72583E-05
|
||||||
|
NPast_10_mean,aot_total,0.143227088,0.001495728
|
||||||
|
NPast_5_mean,aot_total,0.150454018,0.000845224
|
||||||
|
NPast_global_mean,aot_total,0.148341676,0.001001318
|
||||||
|
X5.10_global_mean,aot_total,0.206551087,4.11273E-06
|
||||||
|
X5.10fut_mean,aot_total,0.208127536,3.45821E-06
|
||||||
|
X5.10past_mean,aot_total,0.168898536,0.000175189
|
||||||
|
NFut_5_mean,crt_correct,-0.055312717,0.222103555
|
||||||
|
NPast_5_mean,crt_int,-0.053658543,0.236260102
|
||||||
|
NPast_global_mean,crt_int,-0.043976695,0.33182097
|
||||||
|
NPast_10_mean,crt_int,-0.043338736,0.33888983
|
||||||
|
NFut_global_mean,crt_correct,-0.036977473,0.414567649
|
||||||
|
X5.10fut_mean,crt_int,-0.030131358,0.506209892
|
||||||
|
NPast_5_mean,crt_correct,0.028880988,0.524024755
|
||||||
|
NFut_5_mean,crt_int,0.027093379,0.550039031
|
||||||
|
X5.10_global_mean,crt_int,-0.025449064,0.574514939
|
||||||
|
NPast_10_mean,crt_correct,0.021023332,0.642821387
|
||||||
|
NPast_global_mean,crt_correct,0.020816029,0.646102235
|
||||||
|
NFut_10_mean,crt_correct,-0.020486277,0.651335162
|
||||||
|
X5.10past_mean,crt_int,-0.018645297,0.680859581
|
||||||
|
N5_global_mean,crt_correct,-0.018075161,0.690105495
|
||||||
|
N10_global_mean,crt_int,-0.016544862,0.71514808
|
||||||
|
NFut_10_mean,crt_int,-0.009395086,0.835830518
|
||||||
|
NFut_global_mean,crt_int,0.008419817,0.852666611
|
||||||
|
N10_global_mean,crt_correct,-0.008133995,0.857613839
|
||||||
|
N5_global_mean,crt_int,-0.007330571,0.87154938
|
||||||
|
X5.10fut_mean,crt_correct,-0.005165193,0.909294754
|
||||||
|
X5.10_global_mean,crt_correct,-0.004538036,0.920269031
|
||||||
|
X5.10past_mean,crt_correct,0.003021813,0.94685912
|
||||||
|
197
eohi2/correlations - domain specific vars.r
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(corrplot)
|
||||||
|
library(Hmisc)
|
||||||
|
library(psych)
|
||||||
|
|
||||||
|
# Load the data
|
||||||
|
exp2_data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Define the two sets of variables
|
||||||
|
set1_vars <- c("NPast_5_mean", "NPast_10_mean", "NFut_5_mean", "NFut_10_mean",
|
||||||
|
"X5.10past_mean", "X5.10fut_mean", "NPast_global_mean",
|
||||||
|
"NFut_global_mean", "X5.10_global_mean", "N5_global_mean", "N10_global_mean")
|
||||||
|
set2_vars <- c("aot_total", "crt_correct", "crt_int")
|
||||||
|
|
||||||
|
# Create subset with only the variables of interest
|
||||||
|
correlation_data <- exp2_data[, c(set1_vars, set2_vars)]
|
||||||
|
|
||||||
|
# ===== NORMALITY CHECKS =====
|
||||||
|
# Shapiro-Wilk tests for normality (n < 5000)
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
if(length(na.omit(correlation_data[[var]])) <= 5000) {
|
||||||
|
shapiro_result <- shapiro.test(correlation_data[[var]])
|
||||||
|
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
|
||||||
|
var, shapiro_result$p.value,
|
||||||
|
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Kolmogorov-Smirnov test for normality
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
ks_result <- ks.test(correlation_data[[var]], "pnorm",
|
||||||
|
mean = mean(correlation_data[[var]], na.rm = TRUE),
|
||||||
|
sd = sd(correlation_data[[var]], na.rm = TRUE))
|
||||||
|
cat(sprintf("%s: KS p = %.5f %s\n",
|
||||||
|
var, ks_result$p.value,
|
||||||
|
ifelse(ks_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Visual normality checks
|
||||||
|
pdf("normality_plots_domain_vars.pdf", width = 12, height = 8)
|
||||||
|
par(mfrow = c(2, 4))
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
# Histogram with normal curve overlay
|
||||||
|
hist(correlation_data[[var]], main = paste("Histogram:", var),
|
||||||
|
xlab = var, freq = FALSE)
|
||||||
|
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
|
||||||
|
sd = sd(correlation_data[[var]], na.rm = TRUE)),
|
||||||
|
add = TRUE, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Q-Q plot
|
||||||
|
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
|
||||||
|
qqline(correlation_data[[var]], col = "red", lwd = 2)
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# ===== LINEARITY CHECKS =====
|
||||||
|
# Check linearity between variable pairs
|
||||||
|
pdf("linearity_plots_domain_vars.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(4, 3))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
# Scatter plot with regression line
|
||||||
|
plot(correlation_data[[var1]], correlation_data[[var2]],
|
||||||
|
main = paste(var1, "vs", var2),
|
||||||
|
xlab = var1, ylab = var2, pch = 21, cex = 0.6, bg = "lightblue", col = "black")
|
||||||
|
|
||||||
|
# Add linear regression line
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
abline(lm_fit, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add LOESS smooth line for non-linear pattern detection
|
||||||
|
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
|
||||||
|
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
|
||||||
|
loess_pred <- predict(loess_fit, x_seq)
|
||||||
|
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
|
||||||
|
|
||||||
|
# Calculate R-squared for linear fit
|
||||||
|
r_squared <- summary(lm_fit)$r.squared
|
||||||
|
cat(sprintf("%s vs %s: R² = %.5f\n", var1, var2, r_squared))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Residual analysis for linearity
|
||||||
|
pdf("residual_plots_domain_vars.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(4, 3))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
residuals <- residuals(lm_fit)
|
||||||
|
fitted <- fitted(lm_fit)
|
||||||
|
|
||||||
|
plot(fitted, residuals,
|
||||||
|
main = paste("Residuals:", var1, "vs", var2),
|
||||||
|
xlab = "Fitted Values", ylab = "Residuals", pch = 21, cex = 0.6, bg = "lightblue", col = "black")
|
||||||
|
abline(h = 0, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add smooth line to residuals
|
||||||
|
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
# Calculate correlation matrices (both Pearson and Spearman)
|
||||||
|
cor_matrix_pearson <- cor(correlation_data, method = "pearson")
|
||||||
|
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
|
||||||
|
|
||||||
|
# Use Spearman as primary method
|
||||||
|
cor_matrix <- cor_matrix_spearman
|
||||||
|
|
||||||
|
# Print correlation matrices with 5 decimal places
|
||||||
|
print(round(cor_matrix_spearman, 5))
|
||||||
|
|
||||||
|
# Separate correlations between the two sets (Spearman)
|
||||||
|
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
|
||||||
|
print(round(set1_set2_cor, 5))
|
||||||
|
|
||||||
|
# Calculate correlations within each set (Spearman)
|
||||||
|
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
|
||||||
|
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
|
||||||
|
|
||||||
|
# Statistical significance tests (Spearman)
|
||||||
|
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
|
||||||
|
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create correlation plots for both methods
|
||||||
|
pdf("correlation_plot_domain_vars_spearman.pdf", width = 10, height = 8)
|
||||||
|
corrplot(cor_matrix_spearman, method = "color", type = "upper",
|
||||||
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
||||||
|
addCoef.col = "black", number.cex = 0.7,
|
||||||
|
title = "Spearman Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
pdf("correlation_plot_domain_vars_pearson.pdf", width = 10, height = 8)
|
||||||
|
corrplot(cor_matrix_pearson, method = "color", type = "upper",
|
||||||
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
||||||
|
addCoef.col = "black", number.cex = 0.7,
|
||||||
|
title = "Pearson Correlation Matrix: Domain-Specific Vars vs Cognitive Measures")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Summary statistics
|
||||||
|
desc_stats <- describe(correlation_data)
|
||||||
|
print(round(desc_stats, 5))
|
||||||
|
|
||||||
|
# Save results to CSV files
|
||||||
|
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations_domain_vars.csv")
|
||||||
|
write.csv(round(cor_matrix_pearson, 5), "pearson_correlations_domain_vars.csv")
|
||||||
|
write.csv(round(desc_stats, 5), "descriptive_statistics_domain_vars.csv")
|
||||||
|
|
||||||
|
# Save correlation results in a formatted table
|
||||||
|
cor_results <- data.frame(
|
||||||
|
Variable1 = character(),
|
||||||
|
Variable2 = character(),
|
||||||
|
Spearman_r = numeric(),
|
||||||
|
P_value = numeric(),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract significant correlations between sets
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
r_val <- cor_matrix_spearman[var1, var2]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
|
||||||
|
cor_results <- rbind(cor_results, data.frame(
|
||||||
|
Variable1 = var1,
|
||||||
|
Variable2 = var2,
|
||||||
|
Spearman_r = r_val,
|
||||||
|
P_value = p_val,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write.csv(cor_results, "correlations - domain specific vars.csv", row.names = FALSE)
|
||||||
|
|
||||||
176
eohi2/correlations CORRECT - ehi + DGEN x scales.r
Normal file
@ -0,0 +1,176 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load required libraries
|
||||||
|
library(corrplot)
|
||||||
|
library(Hmisc)
|
||||||
|
library(psych)
|
||||||
|
|
||||||
|
# Load the data
|
||||||
|
exp1_data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Define the two sets of variables
|
||||||
|
set1_vars <- c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val",
|
||||||
|
"ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val",
|
||||||
|
"ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN",
|
||||||
|
"ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN",
|
||||||
|
"ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN",
|
||||||
|
"ehiDGEN_5_mean", "ehiDGEN_10_mean",
|
||||||
|
"ehi5_global_mean", "ehi10_global_mean", "ehi5.10_global_mean")
|
||||||
|
set2_vars <- c("aot_total", "crt_correct", "crt_int")
|
||||||
|
|
||||||
|
# Create subset with only the variables of interest
|
||||||
|
correlation_data <- exp1_data[, c(set1_vars, set2_vars)]
|
||||||
|
|
||||||
|
# ===== NORMALITY CHECKS =====
|
||||||
|
# Shapiro-Wilk tests for normality
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
shapiro_result <- shapiro.test(correlation_data[[var]])
|
||||||
|
cat(sprintf("%s: Shapiro-Wilk p = %.5f %s\n",
|
||||||
|
var, shapiro_result$p.value,
|
||||||
|
ifelse(shapiro_result$p.value < 0.05, "(NOT normal)", "(normal)")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Visual normality checks
|
||||||
|
pdf("normality_plots.pdf", width = 12, height = 8)
|
||||||
|
par(mfrow = c(2, 4))
|
||||||
|
for(var in names(correlation_data)) {
|
||||||
|
# Histogram with normal curve overlay
|
||||||
|
hist(correlation_data[[var]], main = paste("Histogram:", var),
|
||||||
|
xlab = var, freq = FALSE)
|
||||||
|
curve(dnorm(x, mean = mean(correlation_data[[var]], na.rm = TRUE),
|
||||||
|
sd = sd(correlation_data[[var]], na.rm = TRUE)),
|
||||||
|
add = TRUE, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Q-Q plot
|
||||||
|
qqnorm(correlation_data[[var]], main = paste("Q-Q Plot:", var))
|
||||||
|
qqline(correlation_data[[var]], col = "red", lwd = 2)
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# ===== LINEARITY CHECKS =====
|
||||||
|
# Check linearity between variable pairs
|
||||||
|
pdf("linearity_plots.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(3, 5))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
# Scatter plot with regression line
|
||||||
|
plot(correlation_data[[var1]], correlation_data[[var2]],
|
||||||
|
main = paste(var1, "vs", var2),
|
||||||
|
xlab = var1, ylab = var2, pch = 16, cex = 0.6)
|
||||||
|
|
||||||
|
# Add linear regression line
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
abline(lm_fit, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add LOESS smooth line for non-linear pattern detection
|
||||||
|
loess_fit <- loess(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
x_seq <- seq(min(correlation_data[[var1]], na.rm = TRUE),
|
||||||
|
max(correlation_data[[var1]], na.rm = TRUE), length = 100)
|
||||||
|
loess_pred <- predict(loess_fit, x_seq)
|
||||||
|
lines(x_seq, loess_pred, col = "blue", lwd = 2, lty = 2)
|
||||||
|
|
||||||
|
# Calculate R-squared for linear fit
|
||||||
|
r_squared <- summary(lm_fit)$r.squared
|
||||||
|
cat(sprintf("%s vs %s: R² = %.4f\n", var1, var2, r_squared))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Residual analysis for linearity
|
||||||
|
pdf("residual_plots.pdf", width = 15, height = 10)
|
||||||
|
par(mfrow = c(3, 5))
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
|
||||||
|
lm_fit <- lm(correlation_data[[var2]] ~ correlation_data[[var1]])
|
||||||
|
residuals <- residuals(lm_fit)
|
||||||
|
fitted <- fitted(lm_fit)
|
||||||
|
|
||||||
|
plot(fitted, residuals,
|
||||||
|
main = paste("Residuals:", var1, "vs", var2),
|
||||||
|
xlab = "Fitted Values", ylab = "Residuals", pch = 16, cex = 0.6)
|
||||||
|
abline(h = 0, col = "red", lwd = 2)
|
||||||
|
|
||||||
|
# Add smooth line to residuals
|
||||||
|
lines(lowess(fitted, residuals), col = "blue", lwd = 2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
# Calculate correlation matrix (Spearman only)
|
||||||
|
cor_matrix_spearman <- cor(correlation_data, method = "spearman")
|
||||||
|
|
||||||
|
# Print correlation matrix with 5 decimal places
|
||||||
|
print(round(cor_matrix_spearman, 5))
|
||||||
|
|
||||||
|
# Separate correlations between the two sets (Spearman)
|
||||||
|
set1_set2_cor <- cor_matrix_spearman[set1_vars, set2_vars]
|
||||||
|
print(round(set1_set2_cor, 5))
|
||||||
|
|
||||||
|
# Calculate correlations within each set (Spearman)
|
||||||
|
set1_within_cor <- cor_matrix_spearman[set1_vars, set1_vars]
|
||||||
|
set2_within_cor <- cor_matrix_spearman[set2_vars, set2_vars]
|
||||||
|
|
||||||
|
# Statistical significance tests (Spearman)
|
||||||
|
cor_test_results_spearman <- rcorr(as.matrix(correlation_data), type = "spearman")
|
||||||
|
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
cat(sprintf("%s vs %s: p = %.5f\n", var1, var2, p_val))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Create correlation plot (Spearman only)
|
||||||
|
pdf("correlation_plot_scales_spearman.pdf", width = 10, height = 8)
|
||||||
|
corrplot(cor_matrix_spearman, method = "color", type = "upper",
|
||||||
|
order = "hclust", tl.cex = 0.8, tl.col = "black",
|
||||||
|
addCoef.col = "black", number.cex = 0.7,
|
||||||
|
title = "Spearman Correlation Matrix: EOHI/DGEN vs Cognitive Measures")
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
# Summary statistics
|
||||||
|
desc_stats <- describe(correlation_data)
|
||||||
|
print(round(desc_stats, 5))
|
||||||
|
|
||||||
|
# Save results to CSV files
|
||||||
|
write.csv(round(cor_matrix_spearman, 5), "spearman_correlations.csv")
|
||||||
|
write.csv(round(desc_stats, 5), "descriptive_statistics.csv")
|
||||||
|
|
||||||
|
# Save correlation results in a formatted table
|
||||||
|
cor_results <- data.frame(
|
||||||
|
Variable1 = character(),
|
||||||
|
Variable2 = character(),
|
||||||
|
Spearman_r = numeric(),
|
||||||
|
P_value = numeric(),
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract significant correlations between sets
|
||||||
|
for(i in 1:length(set1_vars)) {
|
||||||
|
for(j in 1:length(set2_vars)) {
|
||||||
|
var1 <- set1_vars[i]
|
||||||
|
var2 <- set2_vars[j]
|
||||||
|
r_val <- cor_matrix_spearman[var1, var2]
|
||||||
|
p_val <- cor_test_results_spearman$P[var1, var2]
|
||||||
|
|
||||||
|
cor_results <- rbind(cor_results, data.frame(
|
||||||
|
Variable1 = var1,
|
||||||
|
Variable2 = var2,
|
||||||
|
Spearman_r = r_val,
|
||||||
|
P_value = p_val,
|
||||||
|
stringsAsFactors = FALSE
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write.csv(cor_results, "correlationCORRECT_exp2.csv", row.names = FALSE)
|
||||||
266
eohi2/dataP 01 - recode and combine past & future vars.r
Normal file
@ -0,0 +1,266 @@
|
|||||||
|
# Script to combine and recode Likert scale items in eohi2.csv
|
||||||
|
# Combines 01 and 02 versions of items, then recodes text to numeric values
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings="" keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define the mapping function
|
||||||
|
recode_likert <- function(x) {
|
||||||
|
case_when(
|
||||||
|
tolower(x) == "strongly disagree" ~ -3,
|
||||||
|
tolower(x) == "disagree" ~ -2,
|
||||||
|
tolower(x) == "somewhat disagree" ~ -1,
|
||||||
|
tolower(x) == "neither agree nor disagree" ~ 0,
|
||||||
|
tolower(x) == "somewhat agree" ~ 1,
|
||||||
|
tolower(x) == "agree" ~ 2,
|
||||||
|
tolower(x) == "strongly agree" ~ 3,
|
||||||
|
TRUE ~ NA_real_
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define source column pairs (Set A and Set B)
|
||||||
|
source_cols_A <- c(
|
||||||
|
"01past5PrefItem_1", "01past5PrefItem_2", "01past5PrefItem_3", "01past5PrefItem_4", "01past5PrefItem_5",
|
||||||
|
"01past5PersItem_1", "01past5PersItem_2", "01past5PersItem_3", "01past5PersItem_4", "01past5PersItem_5",
|
||||||
|
"01past5ValItem_1", "01past5ValItem_2", "01past5ValItem_3", "01past5ValItem_4", "01past5ValItem_5",
|
||||||
|
"01past10PrefItem_1", "01past10PrefItem_2", "01past10PrefItem_3", "01past10PrefItem_4", "01past10PrefItem_5",
|
||||||
|
"01past10PersItem_1", "01past10PersItem_2", "01past10PersItem_3", "01past10PersItem_4", "01past10PersItem_5",
|
||||||
|
"01past10ValItem_1", "01past10ValItem_2", "01past10ValItem_3", "01past10ValItem_4", "01past10ValItem_5",
|
||||||
|
"01fut5PrefItem_1", "01fut5PrefItem_2", "01fut5PrefItem_3", "01fut5PrefItem_4", "01fut5PrefItem_5",
|
||||||
|
"01fut5PersItem_1", "01fut5PersItem_2", "01fut5PersItem_3", "01fut5PersItem_4", "01fut5PersItem_5",
|
||||||
|
"01fut5ValItem_1", "01fut5ValItem_2", "01fut5ValItem_3", "01fut5ValItem_4", "01fut5ValItem_5",
|
||||||
|
"01fut10PrefItem_1", "01fut10PrefItem_2", "01fut10PrefItem_3", "01fut10PrefItem_4", "01fut10PrefItem_5",
|
||||||
|
"01fut10PersItem_1", "01fut10PersItem_2", "01fut10PersItem_3", "01fut10PersItem_4", "01fut10PersItem_5",
|
||||||
|
"01fut10ValItem_1", "01fut10ValItem_2", "01fut10ValItem_3", "01fut10ValItem_4", "01fut10ValItem_5"
|
||||||
|
)
|
||||||
|
|
||||||
|
source_cols_B <- c(
|
||||||
|
"02past5PrefItem_1", "02past5PrefItem_2", "02past5PrefItem_3", "02past5PrefItem_4", "02past5PrefItem_5",
|
||||||
|
"02past5PersItem_1", "02past5PersItem_2", "02past5PersItem_3", "02past5PersItem_4", "02past5PersItem_5",
|
||||||
|
"02past5ValItem_1", "02past5ValItem_2", "02past5ValItem_3", "02past5ValItem_4", "02past5ValItem_5",
|
||||||
|
"02past10PrefItem_1", "02past10PrefItem_2", "02past10PrefItem_3", "02past10PrefItem_4", "02past10PrefItem_5",
|
||||||
|
"02past10PersItem_1", "02past10PersItem_2", "02past10PersItem_3", "02past10PersItem_4", "02past10PersItem_5",
|
||||||
|
"02past10ValItem_1", "02past10ValItem_2", "02past10ValItem_3", "02past10ValItem_4", "02past10ValItem_5",
|
||||||
|
"02fut5PrefItem_1", "02fut5PrefItem_2", "02fut5PrefItem_3", "02fut5PrefItem_4", "02fut5PrefItem_5",
|
||||||
|
"02fut5PersItem_1", "02fut5PersItem_2", "02fut5PersItem_3", "02fut5PersItem_4", "02fut5PersItem_5",
|
||||||
|
"02fut5ValItem_1", "02fut5ValItem_2", "02fut5ValItem_3", "02fut5ValItem_4", "02fut5ValItem_5",
|
||||||
|
"02fut10PrefItem_1", "02fut10PrefItem_2", "02fut10PrefItem_3", "02fut10PrefItem_4", "02fut10PrefItem_5",
|
||||||
|
"02fut10PersItem_1", "02fut10PersItem_2", "02fut10PersItem_3", "02fut10PersItem_4", "02fut10PersItem_5",
|
||||||
|
"02fut10ValItem_1", "02fut10ValItem_2", "02fut10ValItem_3", "02fut10ValItem_4", "02fut10ValItem_5"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define target column names
|
||||||
|
target_cols <- c(
|
||||||
|
"past_5_pref_read", "past_5_pref_music", "past_5_pref_TV", "past_5_pref_nap", "past_5_pref_travel",
|
||||||
|
"past_5_pers_extravert", "past_5_pers_critical", "past_5_pers_dependable", "past_5_pers_anxious", "past_5_pers_complex",
|
||||||
|
"past_5_val_obey", "past_5_val_trad", "past_5_val_opinion", "past_5_val_performance", "past_5_val_justice",
|
||||||
|
"past_10_pref_read", "past_10_pref_music", "past_10_pref_TV", "past_10_pref_nap", "past_10_pref_travel",
|
||||||
|
"past_10_pers_extravert", "past_10_pers_critical", "past_10_pers_dependable", "past_10_pers_anxious", "past_10_pers_complex",
|
||||||
|
"past_10_val_obey", "past_10_val_trad", "past_10_val_opinion", "past_10_val_performance", "past_10_val_justice",
|
||||||
|
"fut_5_pref_read", "fut_5_pref_music", "fut_5_pref_TV", "fut_5_pref_nap", "fut_5_pref_travel",
|
||||||
|
"fut_5_pers_extravert", "fut_5_pers_critical", "fut_5_pers_dependable", "fut_5_pers_anxious", "fut_5_pers_complex",
|
||||||
|
"fut_5_val_obey", "fut_5_val_trad", "fut_5_val_opinion", "fut_5_val_performance", "fut_5_val_justice",
|
||||||
|
"fut_10_pref_read", "fut_10_pref_music", "fut_10_pref_TV", "fut_10_pref_nap", "fut_10_pref_travel",
|
||||||
|
"fut_10_pers_extravert", "fut_10_pers_critical", "fut_10_pers_dependable", "fut_10_pers_anxious", "fut_10_pers_complex",
|
||||||
|
"fut_10_val_obey", "fut_10_val_trad", "fut_10_val_opinion", "fut_10_val_performance", "fut_10_val_justice"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Print first 30 actual column names for debugging
|
||||||
|
cat("First 30 actual column names in CSV:\n")
|
||||||
|
for (i in 1:min(30, length(df_cols))) {
|
||||||
|
cat(sprintf(" %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
# Check Source A columns
|
||||||
|
missing_A <- source_cols_A[!source_cols_A %in% df_cols]
|
||||||
|
existing_A <- source_cols_A[source_cols_A %in% df_cols]
|
||||||
|
|
||||||
|
cat("Source Set A:\n")
|
||||||
|
cat(" Expected: 60 columns\n")
|
||||||
|
cat(" Found:", length(existing_A), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_A), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_A) > 0) {
|
||||||
|
cat("\n Missing columns from Set A:\n")
|
||||||
|
for (col in missing_A) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check Source B columns
|
||||||
|
missing_B <- source_cols_B[!source_cols_B %in% df_cols]
|
||||||
|
existing_B <- source_cols_B[source_cols_B %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nSource Set B:\n")
|
||||||
|
cat(" Expected: 60 columns\n")
|
||||||
|
cat(" Found:", length(existing_B), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_B), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_B) > 0) {
|
||||||
|
cat("\n Missing columns from Set B:\n")
|
||||||
|
for (col in missing_B) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check for columns with similar names (potential typos/spaces)
|
||||||
|
if (length(missing_A) > 0 || length(missing_B) > 0) {
|
||||||
|
cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
|
||||||
|
all_missing <- c(missing_A, missing_B)
|
||||||
|
for (miss_col in all_missing) {
|
||||||
|
# Find columns that start with similar pattern
|
||||||
|
pattern <- substr(miss_col, 1, 10)
|
||||||
|
similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
|
||||||
|
if (length(similar) > 0) {
|
||||||
|
cat("\nLooking for:", miss_col)
|
||||||
|
cat("\n Similar columns found:\n")
|
||||||
|
for (sim in similar) {
|
||||||
|
cat(" - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_A) > 30 || length(missing_B) > 30) {
|
||||||
|
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# Process each pair of columns
|
||||||
|
for (i in 1:60) {
|
||||||
|
col_A <- source_cols_A[i]
|
||||||
|
col_B <- source_cols_B[i]
|
||||||
|
target_col <- target_cols[i]
|
||||||
|
|
||||||
|
# Get values from columns, handling missing columns
|
||||||
|
vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
|
||||||
|
vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
|
||||||
|
|
||||||
|
# Coalesce: take value from vals_A if present, otherwise from vals_B
|
||||||
|
combined <- ifelse(!is.na(vals_A) & vals_A != "",
|
||||||
|
vals_A,
|
||||||
|
vals_B)
|
||||||
|
|
||||||
|
# Recode to numeric
|
||||||
|
df[[target_col]] <- recode_likert(combined)
|
||||||
|
|
||||||
|
# Print progress
|
||||||
|
cat("Processed:", target_col, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============= VERIFY TARGET COLUMNS WERE CREATED =============
|
||||||
|
cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
|
||||||
|
|
||||||
|
# Get updated column names
|
||||||
|
df_cols_after <- trimws(names(df))
|
||||||
|
|
||||||
|
# Check which target columns exist
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols_after]
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols_after]
|
||||||
|
|
||||||
|
cat("Target Columns:\n")
|
||||||
|
cat(" Expected: 60 columns\n")
|
||||||
|
cat(" Created:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n WARNING: The following target columns were NOT created:\n")
|
||||||
|
for (col in missing_targets) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
stop("\nERROR: Not all target columns were created successfully!")
|
||||||
|
} else {
|
||||||
|
cat("\n SUCCESS: All 60 target columns created successfully!\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END VERIFICATION ===\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows
|
||||||
|
|
||||||
|
qa_check_random_row <- function() {
|
||||||
|
# Pick a random row
|
||||||
|
random_row <- sample(1:nrow(df), 1)
|
||||||
|
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
# Check each of the 60 pairs
|
||||||
|
for (i in 1:60) {
|
||||||
|
col_A <- source_cols_A[i]
|
||||||
|
col_B <- source_cols_B[i]
|
||||||
|
target_col <- target_cols[i]
|
||||||
|
|
||||||
|
# Get values
|
||||||
|
val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
|
||||||
|
val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
|
||||||
|
target_val <- df[random_row, target_col]
|
||||||
|
|
||||||
|
# Determine which source had the value
|
||||||
|
has_val_A <- !is.na(val_A) && val_A != ""
|
||||||
|
has_val_B <- !is.na(val_B) && val_B != ""
|
||||||
|
|
||||||
|
if (has_val_A) {
|
||||||
|
source_used <- "A"
|
||||||
|
original_text <- val_A
|
||||||
|
} else if (has_val_B) {
|
||||||
|
source_used <- "B"
|
||||||
|
original_text <- val_B
|
||||||
|
} else {
|
||||||
|
source_used <- "NONE"
|
||||||
|
original_text <- "(empty)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the info
|
||||||
|
cat(sprintf("Pair %2d:\n", i))
|
||||||
|
cat(sprintf(" Source A: %-30s\n", col_A))
|
||||||
|
cat(sprintf(" Source B: %-30s\n", col_B))
|
||||||
|
cat(sprintf(" Target: %-30s\n", target_col))
|
||||||
|
cat(sprintf(" Value found in: Source %s\n", source_used))
|
||||||
|
cat(sprintf(" Original text: '%s'\n", original_text))
|
||||||
|
cat(sprintf(" Numeric value: %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on first random row
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row()
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
|
||||||
|
cat("Run this command in R console:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\nProcessing complete! 60 new columns added to eohi2.csv\n")
|
||||||
|
|
||||||
192
eohi2/dataP 02 - recode present VARS.r
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
# Script to recode present-time Likert scale items in eohi2.csv
|
||||||
|
# Recodes prePrefItem, prePersItem, and preValItem to numeric values
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define the mapping function
|
||||||
|
recode_likert <- function(x) {
|
||||||
|
case_when(
|
||||||
|
tolower(x) == "strongly disagree" ~ -3,
|
||||||
|
tolower(x) == "disagree" ~ -2,
|
||||||
|
tolower(x) == "somewhat disagree" ~ -1,
|
||||||
|
tolower(x) == "neither agree nor disagree" ~ 0,
|
||||||
|
tolower(x) == "somewhat agree" ~ 1,
|
||||||
|
tolower(x) == "agree" ~ 2,
|
||||||
|
tolower(x) == "strongly agree" ~ 3,
|
||||||
|
TRUE ~ NA_real_
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define source columns (15 columns total)
|
||||||
|
source_cols <- c(
|
||||||
|
"prePrefItem_1", "prePrefItem_2", "prePrefItem_3", "prePrefItem_4", "prePrefItem_5",
|
||||||
|
"prePersItem_1", "prePersItem_2", "prePersItem_3", "prePersItem_4", "prePersItem_5",
|
||||||
|
"preValItem_1", "preValItem_2", "preValItem_3", "preValItem_4", "preValItem_5"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define target column names (15 columns total)
|
||||||
|
target_cols <- c(
|
||||||
|
"present_pref_read", "present_pref_music", "present_pref_tv", "present_pref_nap", "present_pref_travel",
|
||||||
|
"present_pers_extravert", "present_pers_critical", "present_pers_dependable", "present_pers_anxious", "present_pers_complex",
|
||||||
|
"present_val_obey", "present_val_trad", "present_val_opinion", "present_val_performance", "present_val_justice"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Print first 30 actual column names for debugging
|
||||||
|
cat("First 30 actual column names in CSV:\n")
|
||||||
|
for (i in 1:min(30, length(df_cols))) {
|
||||||
|
cat(sprintf(" %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
# Check Source columns
|
||||||
|
missing_source <- source_cols[!source_cols %in% df_cols]
|
||||||
|
existing_source <- source_cols[source_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("Source Columns:\n")
|
||||||
|
cat(" Expected: 15 columns\n")
|
||||||
|
cat(" Found:", length(existing_source), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_source), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_source) > 0) {
|
||||||
|
cat("\n Missing columns:\n")
|
||||||
|
for (col in missing_source) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check for columns with similar names (potential typos/spaces)
|
||||||
|
if (length(missing_source) > 0) {
|
||||||
|
cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
|
||||||
|
for (miss_col in missing_source) {
|
||||||
|
# Find columns that start with similar pattern
|
||||||
|
pattern <- substr(miss_col, 1, 10)
|
||||||
|
similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
|
||||||
|
if (length(similar) > 0) {
|
||||||
|
cat("\nLooking for:", miss_col)
|
||||||
|
cat("\n Similar columns found:\n")
|
||||||
|
for (sim in similar) {
|
||||||
|
cat(" - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_source) > 7) {
|
||||||
|
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# Check if target columns exist in the dataframe
|
||||||
|
cat("\n=== CHECKING TARGET COLUMNS ===\n")
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols]
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("Target Columns:\n")
|
||||||
|
cat(" Expected: 15 columns\n")
|
||||||
|
cat(" Found:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n Target columns do NOT exist yet - will create them.\n")
|
||||||
|
if (length(existing_targets) > 0) {
|
||||||
|
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat(" All target columns exist - will overwrite with recoded values.\n")
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
# Process each column (overwrite existing target columns with recoded values)
|
||||||
|
for (i in 1:15) {
|
||||||
|
source_col <- source_cols[i]
|
||||||
|
target_col <- target_cols[i]
|
||||||
|
|
||||||
|
# Get values from source column, handling missing columns
|
||||||
|
source_vals <- if (source_col %in% names(df)) df[[source_col]] else rep(NA, nrow(df))
|
||||||
|
|
||||||
|
# Recode to numeric and overwrite existing target column
|
||||||
|
df[[target_col]] <- recode_likert(source_vals)
|
||||||
|
|
||||||
|
# Print progress
|
||||||
|
cat("Processed:", target_col, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== RECODING COMPLETE ===\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows
|
||||||
|
|
||||||
|
qa_check_random_row <- function() {
|
||||||
|
# Pick a random row
|
||||||
|
random_row <- sample(1:nrow(df), 1)
|
||||||
|
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
# Check each of the 15 columns
|
||||||
|
for (i in 1:15) {
|
||||||
|
source_col <- source_cols[i]
|
||||||
|
target_col <- target_cols[i]
|
||||||
|
|
||||||
|
# Get values
|
||||||
|
source_val <- if (source_col %in% names(df)) df[random_row, source_col] else ""
|
||||||
|
target_val <- df[random_row, target_col]
|
||||||
|
|
||||||
|
# Determine if source has a value
|
||||||
|
has_val <- !is.na(source_val) && source_val != ""
|
||||||
|
|
||||||
|
original_text <- if (has_val) source_val else "(empty)"
|
||||||
|
|
||||||
|
# Print the info
|
||||||
|
cat(sprintf("Column %2d:\n", i))
|
||||||
|
cat(sprintf(" Source: %-30s\n", source_col))
|
||||||
|
cat(sprintf(" Target: %-30s\n", target_col))
|
||||||
|
cat(sprintf(" Original text: '%s'\n", original_text))
|
||||||
|
cat(sprintf(" Numeric value: %s\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on first random row
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row()
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
|
||||||
|
cat("Run this command in R console:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
|
||||||
|
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
|
||||||
|
cat("Review the output above, then uncomment line 189 to save changes.\n")
|
||||||
|
cat("\nProcessing complete! 15 new columns created (not yet saved to file).\n")
|
||||||
253
eohi2/dataP 03 - recode DGEN vars.r
Normal file
@ -0,0 +1,253 @@
|
|||||||
|
# Script to combine DGEN variables in eohi2.csv
|
||||||
|
# Combines 01 and 02 versions of DGEN items (no recoding, just copying values)
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define source column pairs (Set A and Set B)
|
||||||
|
# NOTE: fut5/fut10 columns use _8 suffix and "Values" spelling based on CSV header
|
||||||
|
source_cols_A <- c(
|
||||||
|
"01past5PrefDGEN_1",
|
||||||
|
"01past5PersDGEN_1",
|
||||||
|
"01past5ValDGEN_1",
|
||||||
|
"01past10PrefDGEN_1",
|
||||||
|
"01past10PersDGEN_1",
|
||||||
|
"01past10ValDGEN_1",
|
||||||
|
"01fut5PrefDGEN_8",
|
||||||
|
"01fut5PersDGEN_8",
|
||||||
|
"01fut5ValuesDGEN_1",
|
||||||
|
"01fut10PrefDGEN_8",
|
||||||
|
"01fut10PersDGEN_8",
|
||||||
|
"01fut10ValuesDGEN_1"
|
||||||
|
)
|
||||||
|
|
||||||
|
source_cols_B <- c(
|
||||||
|
"02past5PrefDGEN_1",
|
||||||
|
"02past5PersDGEN_1",
|
||||||
|
"02past5ValDGEN_1",
|
||||||
|
"02past10PrefDGEN_1",
|
||||||
|
"02past10PersDGEN_1",
|
||||||
|
"02past10ValDGEN_1",
|
||||||
|
"02fut5PrefDGEN_8",
|
||||||
|
"02fut5PersDGEN_8",
|
||||||
|
"02fut5ValDGEN_1",
|
||||||
|
"02fut10PrefDGEN_8",
|
||||||
|
"02fut10PersDGEN_8",
|
||||||
|
"02fut10ValDGEN_1"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define target column names
|
||||||
|
target_cols <- c(
|
||||||
|
"DGEN_past_5_Pref",
|
||||||
|
"DGEN_past_5_Pers",
|
||||||
|
"DGEN_past_5_Val",
|
||||||
|
"DGEN_past_10_Pref",
|
||||||
|
"DGEN_past_10_Pers",
|
||||||
|
"DGEN_past_10_Val",
|
||||||
|
"DGEN_fut_5_Pref",
|
||||||
|
"DGEN_fut_5_Pers",
|
||||||
|
"DGEN_fut_5_Val",
|
||||||
|
"DGEN_fut_10_Pref",
|
||||||
|
"DGEN_fut_10_Pers",
|
||||||
|
"DGEN_fut_10_Val"
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Print first 30 actual column names for debugging
|
||||||
|
cat("First 30 actual column names in CSV:\n")
|
||||||
|
for (i in 1:min(30, length(df_cols))) {
|
||||||
|
cat(sprintf(" %2d. '%s' (length: %d)\n", i, df_cols[i], nchar(df_cols[i])))
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
# Check Source A columns
|
||||||
|
missing_A <- source_cols_A[!source_cols_A %in% df_cols]
|
||||||
|
existing_A <- source_cols_A[source_cols_A %in% df_cols]
|
||||||
|
|
||||||
|
cat("Source Set A:\n")
|
||||||
|
cat(" Expected: 12 columns\n")
|
||||||
|
cat(" Found:", length(existing_A), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_A), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_A) > 0) {
|
||||||
|
cat("\n Missing columns from Set A:\n")
|
||||||
|
for (col in missing_A) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check Source B columns
|
||||||
|
missing_B <- source_cols_B[!source_cols_B %in% df_cols]
|
||||||
|
existing_B <- source_cols_B[source_cols_B %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nSource Set B:\n")
|
||||||
|
cat(" Expected: 12 columns\n")
|
||||||
|
cat(" Found:", length(existing_B), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_B), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_B) > 0) {
|
||||||
|
cat("\n Missing columns from Set B:\n")
|
||||||
|
for (col in missing_B) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check for columns with similar names (potential typos/spaces)
|
||||||
|
if (length(missing_A) > 0 || length(missing_B) > 0) {
|
||||||
|
cat("\n\n=== CHECKING FOR SIMILAR COLUMN NAMES ===\n")
|
||||||
|
all_missing <- c(missing_A, missing_B)
|
||||||
|
for (miss_col in all_missing) {
|
||||||
|
# Find columns that start with similar pattern
|
||||||
|
pattern <- substr(miss_col, 1, 10)
|
||||||
|
similar <- grep(pattern, df_cols, value = TRUE, ignore.case = TRUE)
|
||||||
|
if (length(similar) > 0) {
|
||||||
|
cat("\nLooking for:", miss_col)
|
||||||
|
cat("\n Similar columns found:\n")
|
||||||
|
for (sim in similar) {
|
||||||
|
cat(" - '", sim, "' (length:", nchar(sim), ")\n", sep = "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_A) > 6 || length(missing_B) > 6) {
|
||||||
|
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# Process each pair of columns (just copy values, no recoding)
|
||||||
|
for (i in 1:12) {
|
||||||
|
col_A <- source_cols_A[i]
|
||||||
|
col_B <- source_cols_B[i]
|
||||||
|
target_col <- target_cols[i]
|
||||||
|
|
||||||
|
# Get values from columns, handling missing columns
|
||||||
|
vals_A <- if (col_A %in% names(df)) df[[col_A]] else rep(NA, nrow(df))
|
||||||
|
vals_B <- if (col_B %in% names(df)) df[[col_B]] else rep(NA, nrow(df))
|
||||||
|
|
||||||
|
# Coalesce: take value from vals_A if present, otherwise from vals_B
|
||||||
|
# No recoding - just copy the value directly
|
||||||
|
combined <- ifelse(!is.na(vals_A) & vals_A != "",
|
||||||
|
vals_A,
|
||||||
|
vals_B)
|
||||||
|
|
||||||
|
# Copy directly to target column (no recoding)
|
||||||
|
df[[target_col]] <- combined
|
||||||
|
|
||||||
|
# Print progress
|
||||||
|
cat("Processed:", target_col, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# ============= VERIFY TARGET COLUMNS WERE CREATED =============
|
||||||
|
cat("\n\n=== VERIFYING TARGET COLUMNS ===\n\n")
|
||||||
|
|
||||||
|
# Get updated column names
|
||||||
|
df_cols_after <- trimws(names(df))
|
||||||
|
|
||||||
|
# Check which target columns exist
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols_after]
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols_after]
|
||||||
|
|
||||||
|
cat("Target Columns:\n")
|
||||||
|
cat(" Expected: 12 columns\n")
|
||||||
|
cat(" Created:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n WARNING: The following target columns were NOT created:\n")
|
||||||
|
for (col in missing_targets) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
stop("\nERROR: Not all target columns were created successfully!")
|
||||||
|
} else {
|
||||||
|
cat("\n SUCCESS: All 12 target columns created successfully!\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END VERIFICATION ===\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows
|
||||||
|
|
||||||
|
qa_check_random_row <- function() {
|
||||||
|
# Pick a random row
|
||||||
|
random_row <- sample(1:nrow(df), 1)
|
||||||
|
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
# Check each of the 12 pairs
|
||||||
|
for (i in 1:12) {
|
||||||
|
col_A <- source_cols_A[i]
|
||||||
|
col_B <- source_cols_B[i]
|
||||||
|
target_col <- target_cols[i]
|
||||||
|
|
||||||
|
# Get values
|
||||||
|
val_A <- if (col_A %in% names(df)) df[random_row, col_A] else ""
|
||||||
|
val_B <- if (col_B %in% names(df)) df[random_row, col_B] else ""
|
||||||
|
target_val <- df[random_row, target_col]
|
||||||
|
|
||||||
|
# Determine which source had the value
|
||||||
|
has_val_A <- !is.na(val_A) && val_A != ""
|
||||||
|
has_val_B <- !is.na(val_B) && val_B != ""
|
||||||
|
|
||||||
|
if (has_val_A) {
|
||||||
|
source_used <- "A"
|
||||||
|
original_value <- val_A
|
||||||
|
} else if (has_val_B) {
|
||||||
|
source_used <- "B"
|
||||||
|
original_value <- val_B
|
||||||
|
} else {
|
||||||
|
source_used <- "NONE"
|
||||||
|
original_value <- "(empty)"
|
||||||
|
}
|
||||||
|
|
||||||
|
# Print the info
|
||||||
|
cat(sprintf("Pair %2d:\n", i))
|
||||||
|
cat(sprintf(" Source A: %-30s\n", col_A))
|
||||||
|
cat(sprintf(" Source B: %-30s\n", col_B))
|
||||||
|
cat(sprintf(" Target: %-30s\n", target_col))
|
||||||
|
cat(sprintf(" Value found in: Source %s\n", source_used))
|
||||||
|
cat(sprintf(" Original value: '%s'\n", original_value))
|
||||||
|
cat(sprintf(" Target value: '%s'\n", ifelse(is.na(target_val), "NA", as.character(target_val))))
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on first random row
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row()
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
|
||||||
|
cat("Run this command in R console:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\nProcessing complete! 12 new columns added to eohi2.csv\n")
|
||||||
183
eohi2/dataP 04 - DGEN means.r
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
# Script to calculate DGEN means by time period in eohi2.csv
|
||||||
|
# Averages the 3 domain scores (Pref, Pers, Val) for each time period
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define source columns (12 total)
|
||||||
|
source_cols <- c(
|
||||||
|
"DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
|
||||||
|
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
|
||||||
|
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
|
||||||
|
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define target columns (4 total)
|
||||||
|
target_cols <- c(
|
||||||
|
"DGEN_past_5_mean",
|
||||||
|
"DGEN_past_10_mean",
|
||||||
|
"DGEN_fut_5_mean",
|
||||||
|
"DGEN_fut_10_mean"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define groupings: each target gets 3 source columns
|
||||||
|
source_groups <- list(
|
||||||
|
DGEN_past_5_mean = c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val"),
|
||||||
|
DGEN_past_10_mean = c("DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val"),
|
||||||
|
DGEN_fut_5_mean = c("DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"),
|
||||||
|
DGEN_fut_10_mean = c("DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Check Source columns
|
||||||
|
missing_source <- source_cols[!source_cols %in% df_cols]
|
||||||
|
existing_source <- source_cols[source_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("Source Columns:\n")
|
||||||
|
cat(" Expected: 12 columns\n")
|
||||||
|
cat(" Found:", length(existing_source), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_source), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_source) > 0) {
|
||||||
|
cat("\n Missing source columns:\n")
|
||||||
|
for (col in missing_source) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check Target columns
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols]
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nTarget Columns:\n")
|
||||||
|
cat(" Expected: 4 columns\n")
|
||||||
|
cat(" Found:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n Target columns do NOT exist yet - will create them.\n")
|
||||||
|
if (length(existing_targets) > 0) {
|
||||||
|
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat(" All target columns exist - will overwrite with calculated values.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_source) > 6) {
|
||||||
|
stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# ============= CALCULATE MEANS =============
|
||||||
|
cat("Calculating DGEN means by time period...\n")
|
||||||
|
|
||||||
|
# Convert source columns to numeric
|
||||||
|
for (col in source_cols) {
|
||||||
|
if (col %in% names(df)) {
|
||||||
|
df[[col]] <- as.numeric(df[[col]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate each target as the mean of its 3 source columns
|
||||||
|
for (target in target_cols) {
|
||||||
|
source_group <- source_groups[[target]]
|
||||||
|
|
||||||
|
# Get the columns that exist
|
||||||
|
existing_cols <- source_group[source_group %in% names(df)]
|
||||||
|
|
||||||
|
if (length(existing_cols) > 0) {
|
||||||
|
# Calculate row means across the 3 domain columns
|
||||||
|
df[[target]] <- rowMeans(df[, existing_cols, drop = FALSE], na.rm = TRUE)
|
||||||
|
cat(" Processed:", target, "\n")
|
||||||
|
} else {
|
||||||
|
cat(" WARNING: No source columns found for", target, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== CALCULATION COMPLETE ===\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows
|
||||||
|
|
||||||
|
qa_check_random_row <- function() {
|
||||||
|
# Pick a random row
|
||||||
|
random_row <- sample(1:nrow(df), 1)
|
||||||
|
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
# Check each of the 4 target columns
|
||||||
|
for (target in target_cols) {
|
||||||
|
source_group <- source_groups[[target]]
|
||||||
|
|
||||||
|
cat(sprintf("Target: %s\n", target))
|
||||||
|
cat(" Source columns:\n")
|
||||||
|
|
||||||
|
# Get values from source columns
|
||||||
|
values <- numeric(3)
|
||||||
|
for (i in 1:3) {
|
||||||
|
col <- source_group[i]
|
||||||
|
val <- if (col %in% names(df)) df[random_row, col] else NA
|
||||||
|
values[i] <- val
|
||||||
|
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate expected mean
|
||||||
|
valid_values <- values[!is.na(values)]
|
||||||
|
if (length(valid_values) > 0) {
|
||||||
|
expected_mean <- mean(valid_values)
|
||||||
|
actual_value <- df[random_row, target]
|
||||||
|
|
||||||
|
cat(sprintf("\n Calculation:\n"))
|
||||||
|
cat(sprintf(" Sum: %s = %.5f\n", paste(valid_values, collapse = " + "), sum(valid_values)))
|
||||||
|
cat(sprintf(" Average of %d values: %.5f\n", length(valid_values), expected_mean))
|
||||||
|
cat(sprintf(" Target value: %.5f\n", actual_value))
|
||||||
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
|
||||||
|
} else {
|
||||||
|
cat(" No valid values to calculate mean.\n")
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on first random row
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row()
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
|
||||||
|
cat("Run this command in R console:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
|
||||||
|
# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
|
||||||
|
cat("Review the output above, then uncomment line 163 to save changes.\n")
|
||||||
|
cat("\nProcessing complete! 4 DGEN mean columns calculated (not yet saved to file).\n")
|
||||||
298
eohi2/dataP 05 - recode scales VARS.r
Normal file
@ -0,0 +1,298 @@
|
|||||||
|
# Script to compute AOT and CRT scales in eohi2.csv
|
||||||
|
# AOT: Reverse codes items 4-7, then averages all 8 items
|
||||||
|
# CRT: Calculates proportion of correct and intuitive responses
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define source columns
|
||||||
|
aot_cols <- c("aot_1", "aot_2", "aot_3", "aot_4", "aot_5", "aot_6", "aot_7", "aot_8")
|
||||||
|
crt_cols <- c("crt_1", "crt_2", "crt_3")
|
||||||
|
|
||||||
|
# Define target columns
|
||||||
|
target_cols <- c("aot_total", "crt_correct", "crt_int")
|
||||||
|
|
||||||
|
# Define correct and intuitive CRT answers
|
||||||
|
crt_correct_answers <- c("5 cents", "5 minutes", "47 days")
|
||||||
|
crt_intuitive_answers <- c("10 cents", "100 minutes", "24 days")
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Check AOT columns
|
||||||
|
missing_aot <- aot_cols[!aot_cols %in% df_cols]
|
||||||
|
existing_aot <- aot_cols[aot_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("AOT Source Columns:\n")
|
||||||
|
cat(" Expected: 8 columns\n")
|
||||||
|
cat(" Found:", length(existing_aot), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_aot), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_aot) > 0) {
|
||||||
|
cat("\n Missing AOT columns:\n")
|
||||||
|
for (col in missing_aot) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check CRT columns
|
||||||
|
missing_crt <- crt_cols[!crt_cols %in% df_cols]
|
||||||
|
existing_crt <- crt_cols[crt_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nCRT Source Columns:\n")
|
||||||
|
cat(" Expected: 3 columns\n")
|
||||||
|
cat(" Found:", length(existing_crt), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_crt), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_crt) > 0) {
|
||||||
|
cat("\n Missing CRT columns:\n")
|
||||||
|
for (col in missing_crt) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check target columns
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols]
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nTarget Columns:\n")
|
||||||
|
cat(" Expected: 3 columns\n")
|
||||||
|
cat(" Found:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n Missing target columns:\n")
|
||||||
|
for (col in missing_targets) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_aot) > 4 || length(missing_crt) > 1 || length(missing_targets) > 1) {
|
||||||
|
stop("ERROR: Too many columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# ============= PROCESS AOT SCALE =============
|
||||||
|
cat("Processing AOT scale...\n")
|
||||||
|
|
||||||
|
# Convert AOT columns to numeric (handling any non-numeric values)
|
||||||
|
for (col in aot_cols) {
|
||||||
|
if (col %in% names(df)) {
|
||||||
|
df[[col]] <- as.numeric(df[[col]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate average with reverse coding (WITHOUT modifying original values)
|
||||||
|
# Items 4, 5, 6, 7 are reverse coded for calculation only
|
||||||
|
df$aot_total <- apply(df[, aot_cols[aot_cols %in% names(df)], drop = FALSE], 1, function(row) {
|
||||||
|
# Create a copy for calculation
|
||||||
|
values <- as.numeric(row)
|
||||||
|
|
||||||
|
# Reverse items 4, 5, 6, 7 (positions in aot_cols vector)
|
||||||
|
reverse_positions <- c(4, 5, 6, 7)
|
||||||
|
values[reverse_positions] <- values[reverse_positions] * -1
|
||||||
|
|
||||||
|
# Return mean (na.rm = TRUE handles missing values)
|
||||||
|
mean(values, na.rm = TRUE)
|
||||||
|
})
|
||||||
|
|
||||||
|
cat(" AOT total scores calculated (items 4-7 reverse coded for calculation only).\n")
|
||||||
|
cat(" Original AOT item values preserved in dataframe.\n\n")
|
||||||
|
|
||||||
|
# ============= PROCESS CRT SCALES =============
|
||||||
|
cat("Processing CRT scales...\n")
|
||||||
|
|
||||||
|
# Initialize CRT columns
|
||||||
|
df$crt_correct <- NA
|
||||||
|
df$crt_int <- NA
|
||||||
|
|
||||||
|
# Process each row
|
||||||
|
for (i in 1:nrow(df)) {
|
||||||
|
# CRT Correct
|
||||||
|
crt_correct_count <- 0
|
||||||
|
crt_correct_n <- 0
|
||||||
|
|
||||||
|
for (j in 1:3) {
|
||||||
|
col <- crt_cols[j]
|
||||||
|
if (col %in% names(df)) {
|
||||||
|
response <- trimws(tolower(as.character(df[i, col])))
|
||||||
|
correct_answer <- tolower(crt_correct_answers[j])
|
||||||
|
|
||||||
|
if (!is.na(response) && response != "") {
|
||||||
|
crt_correct_n <- crt_correct_n + 1
|
||||||
|
if (response == correct_answer) {
|
||||||
|
crt_correct_count <- crt_correct_count + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate proportion correct
|
||||||
|
if (crt_correct_n > 0) {
|
||||||
|
df$crt_correct[i] <- crt_correct_count / crt_correct_n
|
||||||
|
}
|
||||||
|
|
||||||
|
# CRT Intuitive
|
||||||
|
crt_int_count <- 0
|
||||||
|
crt_int_n <- 0
|
||||||
|
|
||||||
|
for (j in 1:3) {
|
||||||
|
col <- crt_cols[j]
|
||||||
|
if (col %in% names(df)) {
|
||||||
|
response <- trimws(tolower(as.character(df[i, col])))
|
||||||
|
intuitive_answer <- tolower(crt_intuitive_answers[j])
|
||||||
|
|
||||||
|
if (!is.na(response) && response != "") {
|
||||||
|
crt_int_n <- crt_int_n + 1
|
||||||
|
if (response == intuitive_answer) {
|
||||||
|
crt_int_count <- crt_int_count + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate proportion intuitive
|
||||||
|
if (crt_int_n > 0) {
|
||||||
|
df$crt_int[i] <- crt_int_count / crt_int_n
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat(" CRT correct and intuitive scores calculated.\n\n")
|
||||||
|
|
||||||
|
cat("=== PROCESSING COMPLETE ===\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows
|
||||||
|
|
||||||
|
qa_check_random_row <- function() {
|
||||||
|
# Pick a random row
|
||||||
|
random_row <- sample(1:nrow(df), 1)
|
||||||
|
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
# AOT Check
|
||||||
|
cat("--- AOT SCALE ---\n")
|
||||||
|
cat("Source values (original in CSV):\n")
|
||||||
|
aot_original <- numeric(8)
|
||||||
|
aot_for_calc <- numeric(8)
|
||||||
|
|
||||||
|
for (i in 1:8) {
|
||||||
|
col <- aot_cols[i]
|
||||||
|
val <- if (col %in% names(df)) df[random_row, col] else NA
|
||||||
|
aot_original[i] <- val
|
||||||
|
|
||||||
|
# Apply reversal for items 4-7
|
||||||
|
if (i %in% 4:7) {
|
||||||
|
aot_for_calc[i] <- val * -1
|
||||||
|
cat(sprintf(" %s: %s (reversed to %s for calculation)\n",
|
||||||
|
col,
|
||||||
|
ifelse(is.na(val), "NA", as.character(val)),
|
||||||
|
ifelse(is.na(val), "NA", as.character(val * -1))))
|
||||||
|
} else {
|
||||||
|
aot_for_calc[i] <- val
|
||||||
|
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", as.character(val))))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Manual calculation check
|
||||||
|
valid_aot <- aot_for_calc[!is.na(aot_for_calc)]
|
||||||
|
if (length(valid_aot) > 0) {
|
||||||
|
expected_mean <- mean(valid_aot)
|
||||||
|
actual_value <- df$aot_total[random_row]
|
||||||
|
cat(sprintf("\nCalculation check:\n"))
|
||||||
|
cat(sprintf(" Sum of reversed values: %s\n", paste(valid_aot, collapse = " + ")))
|
||||||
|
cat(sprintf(" Average of %d valid items: %.5f\n", length(valid_aot), expected_mean))
|
||||||
|
cat(sprintf(" Target value (aot_total): %.5f\n", actual_value))
|
||||||
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
|
||||||
|
} else {
|
||||||
|
cat("\n No valid AOT values to calculate.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# CRT Check
|
||||||
|
cat("\n--- CRT SCALE ---\n")
|
||||||
|
cat("Source values:\n")
|
||||||
|
crt_correct_count <- 0
|
||||||
|
crt_int_count <- 0
|
||||||
|
crt_n <- 0
|
||||||
|
|
||||||
|
for (i in 1:3) {
|
||||||
|
col <- crt_cols[i]
|
||||||
|
val <- if (col %in% names(df)) as.character(df[random_row, col]) else ""
|
||||||
|
val_trimmed <- trimws(tolower(val))
|
||||||
|
|
||||||
|
correct_ans <- crt_correct_answers[i]
|
||||||
|
intuitive_ans <- crt_intuitive_answers[i]
|
||||||
|
|
||||||
|
is_correct <- val_trimmed == tolower(correct_ans)
|
||||||
|
is_intuitive <- val_trimmed == tolower(intuitive_ans)
|
||||||
|
|
||||||
|
if (val_trimmed != "" && !is.na(val_trimmed)) {
|
||||||
|
crt_n <- crt_n + 1
|
||||||
|
if (is_correct) crt_correct_count <- crt_correct_count + 1
|
||||||
|
if (is_intuitive) crt_int_count <- crt_int_count + 1
|
||||||
|
}
|
||||||
|
|
||||||
|
cat(sprintf(" %s: '%s'\n", col, val))
|
||||||
|
cat(sprintf(" Correct answer: '%s' -> %s\n", correct_ans, ifelse(is_correct, "CORRECT ✓", "Not correct")))
|
||||||
|
cat(sprintf(" Intuitive answer: '%s' -> %s\n", intuitive_ans, ifelse(is_intuitive, "INTUITIVE ✓", "Not intuitive")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\nCalculation check:\n")
|
||||||
|
if (crt_n > 0) {
|
||||||
|
expected_correct <- crt_correct_count / crt_n
|
||||||
|
expected_int <- crt_int_count / crt_n
|
||||||
|
actual_correct <- df$crt_correct[random_row]
|
||||||
|
actual_int <- df$crt_int[random_row]
|
||||||
|
|
||||||
|
cat(sprintf(" Correct: %d out of %d = %.5f\n", crt_correct_count, crt_n, expected_correct))
|
||||||
|
cat(sprintf(" Target value (crt_correct): %.5f\n", actual_correct))
|
||||||
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_correct - actual_correct) < 0.0001, "YES ✓", "NO ✗")))
|
||||||
|
|
||||||
|
cat(sprintf("\n Intuitive: %d out of %d = %.5f\n", crt_int_count, crt_n, expected_int))
|
||||||
|
cat(sprintf(" Target value (crt_int): %.5f\n", actual_int))
|
||||||
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_int - actual_int) < 0.0001, "YES ✓", "NO ✗")))
|
||||||
|
} else {
|
||||||
|
cat(" No valid CRT responses to calculate.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on first random row
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row()
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER RANDOM ROW ***\n")
|
||||||
|
cat("Run this command in R console:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
|
||||||
|
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
|
||||||
|
cat("Review the output above, then uncomment line 253 to save changes.\n")
|
||||||
|
cat("\nProcessing complete! AOT and CRT scales calculated (not yet saved to file).\n")
|
||||||
292
eohi2/dataP 06 - time interval differences.r
Normal file
@ -0,0 +1,292 @@
|
|||||||
|
# Script to calculate absolute differences between time intervals in eohi2.csv
|
||||||
|
# Compares present vs past/future, and 5-year vs 10-year intervals
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define the 15 item names (same order for all time periods)
|
||||||
|
items <- c(
|
||||||
|
"pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
|
||||||
|
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
|
||||||
|
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note: present uses lowercase "tv", others use uppercase "TV"
|
||||||
|
items_present <- c(
|
||||||
|
"pref_read", "pref_music", "pref_tv", "pref_nap", "pref_travel",
|
||||||
|
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
|
||||||
|
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define all source columns (75 total)
|
||||||
|
source_cols <- c(
|
||||||
|
paste0("present_", items_present),
|
||||||
|
paste0("past_5_", items),
|
||||||
|
paste0("past_10_", items),
|
||||||
|
paste0("fut_5_", items),
|
||||||
|
paste0("fut_10_", items)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define all target columns (90 total = 6 calculation types × 15 items)
|
||||||
|
target_NPast_5 <- paste0("NPast_5_", items)
|
||||||
|
target_NPast_10 <- paste0("NPast_10_", items)
|
||||||
|
target_NFut_5 <- paste0("NFut_5_", items)
|
||||||
|
target_NFut_10 <- paste0("NFut_10_", items)
|
||||||
|
target_5_10past <- paste0("5.10past_", items)
|
||||||
|
target_5_10fut <- paste0("5.10fut_", items)
|
||||||
|
|
||||||
|
target_cols <- c(
|
||||||
|
target_NPast_5,
|
||||||
|
target_NPast_10,
|
||||||
|
target_NFut_5,
|
||||||
|
target_NFut_10,
|
||||||
|
target_5_10past,
|
||||||
|
target_5_10fut
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Check Source columns
|
||||||
|
missing_source <- source_cols[!source_cols %in% df_cols]
|
||||||
|
existing_source <- source_cols[source_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("Source Columns:\n")
|
||||||
|
cat(" Expected: 75 columns\n")
|
||||||
|
cat(" Found:", length(existing_source), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_source), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_source) > 0 && length(missing_source) <= 10) {
|
||||||
|
cat("\n Missing source columns:\n")
|
||||||
|
for (col in missing_source) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
} else if (length(missing_source) > 10) {
|
||||||
|
cat("\n Too many missing to list individually (", length(missing_source), "missing)\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check Target columns
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols]
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nTarget Columns:\n")
|
||||||
|
cat(" Expected: 90 columns\n")
|
||||||
|
cat(" Found:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n Target columns do NOT exist yet - will create them.\n")
|
||||||
|
if (length(existing_targets) > 0) {
|
||||||
|
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat(" All target columns exist - will overwrite with calculated values.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_source) > 30) {
|
||||||
|
stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# ============= CALCULATE DIFFERENCES =============
|
||||||
|
cat("Calculating time interval differences...\n")
|
||||||
|
|
||||||
|
# Convert source columns to numeric
|
||||||
|
for (col in source_cols) {
|
||||||
|
if (col %in% names(df)) {
|
||||||
|
df[[col]] <- as.numeric(df[[col]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Helper function to calculate absolute difference
|
||||||
|
calc_abs_diff <- function(col1, col2) {
|
||||||
|
val1 <- if (col1 %in% names(df)) df[[col1]] else NA
|
||||||
|
val2 <- if (col2 %in% names(df)) df[[col2]] else NA
|
||||||
|
abs(val1 - val2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate NPast_5: |present - past_5|
|
||||||
|
cat(" Calculating NPast_5 differences (present vs past 5 years)...\n")
|
||||||
|
for (i in 1:15) {
|
||||||
|
target <- target_NPast_5[i]
|
||||||
|
source1 <- paste0("present_", items_present[i])
|
||||||
|
source2 <- paste0("past_5_", items[i])
|
||||||
|
df[[target]] <- calc_abs_diff(source1, source2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate NPast_10: |present - past_10|
|
||||||
|
cat(" Calculating NPast_10 differences (present vs past 10 years)...\n")
|
||||||
|
for (i in 1:15) {
|
||||||
|
target <- target_NPast_10[i]
|
||||||
|
source1 <- paste0("present_", items_present[i])
|
||||||
|
source2 <- paste0("past_10_", items[i])
|
||||||
|
df[[target]] <- calc_abs_diff(source1, source2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate NFut_5: |present - fut_5|
|
||||||
|
cat(" Calculating NFut_5 differences (present vs future 5 years)...\n")
|
||||||
|
for (i in 1:15) {
|
||||||
|
target <- target_NFut_5[i]
|
||||||
|
source1 <- paste0("present_", items_present[i])
|
||||||
|
source2 <- paste0("fut_5_", items[i])
|
||||||
|
df[[target]] <- calc_abs_diff(source1, source2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate NFut_10: |present - fut_10|
|
||||||
|
cat(" Calculating NFut_10 differences (present vs future 10 years)...\n")
|
||||||
|
for (i in 1:15) {
|
||||||
|
target <- target_NFut_10[i]
|
||||||
|
source1 <- paste0("present_", items_present[i])
|
||||||
|
source2 <- paste0("fut_10_", items[i])
|
||||||
|
df[[target]] <- calc_abs_diff(source1, source2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate 5.10past: |past_5 - past_10|
|
||||||
|
cat(" Calculating 5.10past differences (past 5 vs past 10 years)...\n")
|
||||||
|
for (i in 1:15) {
|
||||||
|
target <- target_5_10past[i]
|
||||||
|
source1 <- paste0("past_5_", items[i])
|
||||||
|
source2 <- paste0("past_10_", items[i])
|
||||||
|
df[[target]] <- calc_abs_diff(source1, source2)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate 5.10fut: |fut_5 - fut_10|
|
||||||
|
cat(" Calculating 5.10fut differences (future 5 vs future 10 years)...\n")
|
||||||
|
for (i in 1:15) {
|
||||||
|
target <- target_5_10fut[i]
|
||||||
|
source1 <- paste0("fut_5_", items[i])
|
||||||
|
source2 <- paste0("fut_10_", items[i])
|
||||||
|
df[[target]] <- calc_abs_diff(source1, source2)
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== CALCULATION COMPLETE ===\n")
|
||||||
|
cat(" 90 difference columns created.\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW & ITEM CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows and items
|
||||||
|
|
||||||
|
qa_check_random_row <- function(row_num = NULL, item_num = NULL) {
|
||||||
|
# Pick a random row or use specified row
|
||||||
|
if (is.null(row_num)) {
|
||||||
|
random_row <- sample(seq_len(nrow(df)), 1)
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
} else {
|
||||||
|
if (row_num < 1 || row_num > nrow(df)) {
|
||||||
|
cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
random_row <- row_num
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Specified Row #", random_row, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pick a random item or use specified item
|
||||||
|
if (is.null(item_num)) {
|
||||||
|
test_item_idx <- sample(1:15, 1)
|
||||||
|
cat("Random Item #", test_item_idx, ": ", items[test_item_idx], "\n")
|
||||||
|
} else {
|
||||||
|
if (item_num < 1 || item_num > 15) {
|
||||||
|
cat("ERROR: Item number must be between 1 and 15\n")
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
test_item_idx <- item_num
|
||||||
|
cat("Specified Item #", test_item_idx, ": ", items[test_item_idx], "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
calculations <- list(
|
||||||
|
list(name = "NPast_5", target = target_NPast_5[test_item_idx],
|
||||||
|
source1 = paste0("present_", items_present[test_item_idx]),
|
||||||
|
source2 = paste0("past_5_", items[test_item_idx]),
|
||||||
|
desc = "|present - past_5|"),
|
||||||
|
list(name = "NPast_10", target = target_NPast_10[test_item_idx],
|
||||||
|
source1 = paste0("present_", items_present[test_item_idx]),
|
||||||
|
source2 = paste0("past_10_", items[test_item_idx]),
|
||||||
|
desc = "|present - past_10|"),
|
||||||
|
list(name = "NFut_5", target = target_NFut_5[test_item_idx],
|
||||||
|
source1 = paste0("present_", items_present[test_item_idx]),
|
||||||
|
source2 = paste0("fut_5_", items[test_item_idx]),
|
||||||
|
desc = "|present - fut_5|"),
|
||||||
|
list(name = "NFut_10", target = target_NFut_10[test_item_idx],
|
||||||
|
source1 = paste0("present_", items_present[test_item_idx]),
|
||||||
|
source2 = paste0("fut_10_", items[test_item_idx]),
|
||||||
|
desc = "|present - fut_10|"),
|
||||||
|
list(name = "5.10past", target = target_5_10past[test_item_idx],
|
||||||
|
source1 = paste0("past_5_", items[test_item_idx]),
|
||||||
|
source2 = paste0("past_10_", items[test_item_idx]),
|
||||||
|
desc = "|past_5 - past_10|"),
|
||||||
|
list(name = "5.10fut", target = target_5_10fut[test_item_idx],
|
||||||
|
source1 = paste0("fut_5_", items[test_item_idx]),
|
||||||
|
source2 = paste0("fut_10_", items[test_item_idx]),
|
||||||
|
desc = "|fut_5 - fut_10|")
|
||||||
|
)
|
||||||
|
|
||||||
|
for (calc in calculations) {
|
||||||
|
cat(sprintf("--- %s ---\n", calc$name))
|
||||||
|
cat(sprintf("Formula: %s\n", calc$desc))
|
||||||
|
|
||||||
|
val1 <- if (calc$source1 %in% names(df)) df[random_row, calc$source1] else NA
|
||||||
|
val2 <- if (calc$source2 %in% names(df)) df[random_row, calc$source2] else NA
|
||||||
|
target_val <- df[random_row, calc$target]
|
||||||
|
|
||||||
|
cat(sprintf(" %s: %s\n", calc$source1, ifelse(is.na(val1), "NA", as.character(val1))))
|
||||||
|
cat(sprintf(" %s: %s\n", calc$source2, ifelse(is.na(val2), "NA", as.character(val2))))
|
||||||
|
|
||||||
|
if (!is.na(val1) && !is.na(val2)) {
|
||||||
|
expected_diff <- abs(val1 - val2)
|
||||||
|
cat(sprintf("\n Calculation: |%.5f - %.5f| = %.5f\n", val1, val2, expected_diff))
|
||||||
|
cat(sprintf(" Target (%s): %.5f\n", calc$target, target_val))
|
||||||
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_diff - target_val) < 0.0001, "YES ✓", "NO ✗")))
|
||||||
|
} else {
|
||||||
|
cat(" Cannot calculate (missing values)\n")
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on random row and random item
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row() # Leave blank for random row & item; specify parameters as needed (see examples below)
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER ROW/ITEM ***\n")
|
||||||
|
cat("For random row AND random item, run:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\nFor specific row (e.g., row 118) with random item:\n")
|
||||||
|
cat(" qa_check_random_row(118)\n")
|
||||||
|
cat("\nFor random row with specific item (e.g., item 5 = pref_travel):\n")
|
||||||
|
cat(" qa_check_random_row(item_num = 5)\n")
|
||||||
|
cat("\nFor specific row AND specific item:\n")
|
||||||
|
cat(" qa_check_random_row(118, 5)\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
|
||||||
|
# write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
|
||||||
|
cat("Review the output above, then uncomment line 243 to save changes.\n")
|
||||||
|
cat("\nProcessing complete! 90 difference columns calculated (not yet saved to file).\n")
|
||||||
265
eohi2/dataP 07 - domain means.r
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
# Script to calculate domain means for time interval differences in eohi2.csv
|
||||||
|
# Averages the 5 items within each domain (pref, pers, val) for each time interval type
|
||||||
|
|
||||||
|
# Load necessary library
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data (with check.names=FALSE to preserve original column names)
|
||||||
|
# na.strings=NULL keeps empty cells as empty strings instead of converting to NA
|
||||||
|
df <- read.csv("eohi2.csv", stringsAsFactors = FALSE, check.names = FALSE, na.strings = NULL)
|
||||||
|
|
||||||
|
# Define the 15 item names (same order for all time periods)
|
||||||
|
items <- c(
|
||||||
|
"pref_read", "pref_music", "pref_TV", "pref_nap", "pref_travel",
|
||||||
|
"pers_extravert", "pers_critical", "pers_dependable", "pers_anxious", "pers_complex",
|
||||||
|
"val_obey", "val_trad", "val_opinion", "val_performance", "val_justice"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define domain groupings (indices in items vector)
|
||||||
|
pref_indices <- 1:5
|
||||||
|
pers_indices <- 6:10
|
||||||
|
val_indices <- 11:15
|
||||||
|
|
||||||
|
# Define time interval prefixes
|
||||||
|
time_prefixes <- c("NPast_5", "NPast_10", "NFut_5", "NFut_10", "X5.10past", "X5.10fut")
|
||||||
|
|
||||||
|
# Define domain names
|
||||||
|
domain_names <- c("pref", "pers", "val")
|
||||||
|
|
||||||
|
# Define all source columns (90 total)
|
||||||
|
source_cols <- c(
|
||||||
|
paste0("NPast_5_", items),
|
||||||
|
paste0("NPast_10_", items),
|
||||||
|
paste0("NFut_5_", items),
|
||||||
|
paste0("NFut_10_", items),
|
||||||
|
paste0("X5.10past_", items),
|
||||||
|
paste0("X5.10fut_", items)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Define all target columns (18 total = 6 time intervals × 3 domains)
|
||||||
|
target_cols <- c(
|
||||||
|
paste0("NPast_5_", domain_names, "_MEAN"),
|
||||||
|
paste0("NPast_10_", domain_names, "_MEAN"),
|
||||||
|
paste0("NFut_5_", domain_names, "_MEAN"),
|
||||||
|
paste0("NFut_10_", domain_names, "_MEAN"),
|
||||||
|
paste0("X5.10past_", domain_names, "_MEAN"),
|
||||||
|
paste0("X5.10fut_", domain_names, "_MEAN")
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============= TROUBLESHOOTING: CHECK COLUMN EXISTENCE =============
|
||||||
|
cat("\n=== COLUMN EXISTENCE CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Get actual column names from dataframe (trimmed)
|
||||||
|
df_cols <- trimws(names(df))
|
||||||
|
|
||||||
|
# Check Source columns
|
||||||
|
missing_source <- source_cols[!source_cols %in% df_cols]
|
||||||
|
existing_source <- source_cols[source_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("Source Columns:\n")
|
||||||
|
cat(" Expected: 90 columns\n")
|
||||||
|
cat(" Found:", length(existing_source), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_source), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_source) > 0 && length(missing_source) <= 20) {
|
||||||
|
cat("\n Missing source columns:\n")
|
||||||
|
for (col in missing_source) {
|
||||||
|
cat(" -", col, "\n")
|
||||||
|
}
|
||||||
|
} else if (length(missing_source) > 20) {
|
||||||
|
cat("\n Too many missing to list individually (", length(missing_source), "missing)\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check Target columns
|
||||||
|
missing_targets <- target_cols[!target_cols %in% df_cols]
|
||||||
|
existing_targets <- target_cols[target_cols %in% df_cols]
|
||||||
|
|
||||||
|
cat("\nTarget Columns:\n")
|
||||||
|
cat(" Expected: 18 columns\n")
|
||||||
|
cat(" Found:", length(existing_targets), "columns\n")
|
||||||
|
cat(" Missing:", length(missing_targets), "columns\n")
|
||||||
|
|
||||||
|
if (length(missing_targets) > 0) {
|
||||||
|
cat("\n Target columns do NOT exist yet - will create them.\n")
|
||||||
|
if (length(existing_targets) > 0) {
|
||||||
|
cat(" WARNING: Some target columns already exist and will be overwritten.\n")
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cat(" All target columns exist - will overwrite with calculated values.\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== END CHECK ===\n\n")
|
||||||
|
|
||||||
|
# Stop if critical columns are missing
|
||||||
|
if (length(missing_source) > 45) {
|
||||||
|
stop("ERROR: Too many source columns missing! Please check column names in CSV file.")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("Proceeding with processing...\n\n")
|
||||||
|
|
||||||
|
# ============= CALCULATE DOMAIN MEANS =============
|
||||||
|
cat("Calculating domain means for time interval differences...\n")
|
||||||
|
|
||||||
|
# Convert source columns to numeric
|
||||||
|
for (col in source_cols) {
|
||||||
|
if (col %in% names(df)) {
|
||||||
|
df[[col]] <- as.numeric(df[[col]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate means for each time interval × domain combination
|
||||||
|
for (time_prefix in time_prefixes) {
|
||||||
|
# Preferences mean
|
||||||
|
pref_cols <- paste0(time_prefix, "_", items[pref_indices])
|
||||||
|
existing_pref_cols <- pref_cols[pref_cols %in% names(df)]
|
||||||
|
if (length(existing_pref_cols) > 0) {
|
||||||
|
df[[paste0(time_prefix, "_pref_MEAN")]] <- rowMeans(df[, existing_pref_cols, drop = FALSE], na.rm = TRUE)
|
||||||
|
cat(" Processed:", paste0(time_prefix, "_pref_MEAN"), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Personality mean
|
||||||
|
pers_cols <- paste0(time_prefix, "_", items[pers_indices])
|
||||||
|
existing_pers_cols <- pers_cols[pers_cols %in% names(df)]
|
||||||
|
if (length(existing_pers_cols) > 0) {
|
||||||
|
df[[paste0(time_prefix, "_pers_MEAN")]] <- rowMeans(df[, existing_pers_cols, drop = FALSE], na.rm = TRUE)
|
||||||
|
cat(" Processed:", paste0(time_prefix, "_pers_MEAN"), "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Values mean
|
||||||
|
val_cols <- paste0(time_prefix, "_", items[val_indices])
|
||||||
|
existing_val_cols <- val_cols[val_cols %in% names(df)]
|
||||||
|
if (length(existing_val_cols) > 0) {
|
||||||
|
df[[paste0(time_prefix, "_val_MEAN")]] <- rowMeans(df[, existing_val_cols, drop = FALSE], na.rm = TRUE)
|
||||||
|
cat(" Processed:", paste0(time_prefix, "_val_MEAN"), "\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n=== CALCULATION COMPLETE ===\n")
|
||||||
|
cat(" 18 domain mean columns created.\n\n")
|
||||||
|
|
||||||
|
|
||||||
|
# ============= QUALITY ASSURANCE: RANDOM ROW & TIME INTERVAL CHECK =============
|
||||||
|
# This function can be run multiple times to check different random rows and time intervals
|
||||||
|
|
||||||
|
qa_check_random_row <- function(row_num = NULL, time_interval_num = NULL) {
|
||||||
|
# Pick a random row or use specified row
|
||||||
|
if (is.null(row_num)) {
|
||||||
|
random_row <- sample(seq_len(nrow(df)), 1)
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Random Row #", random_row, "\n")
|
||||||
|
} else {
|
||||||
|
if (row_num < 1 || row_num > nrow(df)) {
|
||||||
|
cat("ERROR: Row number must be between 1 and", nrow(df), "\n")
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
random_row <- row_num
|
||||||
|
cat("\n========================================\n")
|
||||||
|
cat("QA CHECK: Specified Row #", random_row, "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pick a random time interval or use specified interval
|
||||||
|
if (is.null(time_interval_num)) {
|
||||||
|
test_interval_idx <- sample(1:6, 1)
|
||||||
|
cat("Random Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
|
||||||
|
} else {
|
||||||
|
if (time_interval_num < 1 || time_interval_num > 6) {
|
||||||
|
cat("ERROR: Time interval number must be between 1 and 6\n")
|
||||||
|
cat(" 1 = NPast_5, 2 = NPast_10, 3 = NFut_5, 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
|
||||||
|
return()
|
||||||
|
}
|
||||||
|
test_interval_idx <- time_interval_num
|
||||||
|
cat("Specified Time Interval #", test_interval_idx, ": ", time_prefixes[test_interval_idx], "\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n\n")
|
||||||
|
|
||||||
|
time_prefix <- time_prefixes[test_interval_idx]
|
||||||
|
|
||||||
|
# Check each of the 3 domains
|
||||||
|
for (domain_idx in 1:3) {
|
||||||
|
domain_name <- domain_names[domain_idx]
|
||||||
|
|
||||||
|
# Get the appropriate item indices
|
||||||
|
if (domain_idx == 1) {
|
||||||
|
item_indices <- pref_indices
|
||||||
|
domain_label <- "Preferences"
|
||||||
|
} else if (domain_idx == 2) {
|
||||||
|
item_indices <- pers_indices
|
||||||
|
domain_label <- "Personality"
|
||||||
|
} else {
|
||||||
|
item_indices <- val_indices
|
||||||
|
domain_label <- "Values"
|
||||||
|
}
|
||||||
|
|
||||||
|
cat(sprintf("--- %s: %s ---\n", time_prefix, domain_label))
|
||||||
|
|
||||||
|
# Get source column names
|
||||||
|
source_cols_domain <- paste0(time_prefix, "_", items[item_indices])
|
||||||
|
target_col <- paste0(time_prefix, "_", domain_name, "_MEAN")
|
||||||
|
|
||||||
|
# Get values
|
||||||
|
values <- numeric(5)
|
||||||
|
cat("Source values:\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
col <- source_cols_domain[i]
|
||||||
|
val <- if (col %in% names(df)) df[random_row, col] else NA
|
||||||
|
values[i] <- val
|
||||||
|
cat(sprintf(" %s: %s\n", col, ifelse(is.na(val), "NA", sprintf("%.5f", val))))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Calculate expected mean
|
||||||
|
valid_values <- values[!is.na(values)]
|
||||||
|
if (length(valid_values) > 0) {
|
||||||
|
expected_mean <- mean(valid_values)
|
||||||
|
actual_value <- df[random_row, target_col]
|
||||||
|
|
||||||
|
cat(sprintf("\nCalculation:\n"))
|
||||||
|
cat(sprintf(" Sum: %s = %.5f\n",
|
||||||
|
paste(sprintf("%.5f", valid_values), collapse = " + "),
|
||||||
|
sum(valid_values)))
|
||||||
|
cat(sprintf(" Average of %d values: %.5f\n", length(valid_values), expected_mean))
|
||||||
|
cat(sprintf(" Target (%s): %.5f\n", target_col, actual_value))
|
||||||
|
cat(sprintf(" Match: %s\n", ifelse(abs(expected_mean - actual_value) < 0.0001, "YES ✓", "NO ✗")))
|
||||||
|
} else {
|
||||||
|
cat(" No valid values to calculate mean.\n")
|
||||||
|
}
|
||||||
|
cat("\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("========================================\n")
|
||||||
|
cat("END QA CHECK\n")
|
||||||
|
cat("========================================\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Run QA check on random row and random time interval
|
||||||
|
cat("\n\n")
|
||||||
|
qa_check_random_row() # Leave blank for random row & interval; specify parameters as needed (see examples below)
|
||||||
|
|
||||||
|
# Instructions for running additional checks
|
||||||
|
cat("\n")
|
||||||
|
cat("*** TO CHECK ANOTHER ROW/TIME INTERVAL ***\n")
|
||||||
|
cat("For random row AND random time interval, run:\n")
|
||||||
|
cat(" qa_check_random_row()\n")
|
||||||
|
cat("\nFor specific row (e.g., row 118) with random interval:\n")
|
||||||
|
cat(" qa_check_random_row(118)\n")
|
||||||
|
cat("\nFor random row with specific interval (e.g., 3 = NFut_5):\n")
|
||||||
|
cat(" qa_check_random_row(time_interval_num = 3)\n")
|
||||||
|
cat("\nFor specific row AND specific interval:\n")
|
||||||
|
cat(" qa_check_random_row(118, 3)\n")
|
||||||
|
cat("\n")
|
||||||
|
cat("Time Interval Numbers:\n")
|
||||||
|
cat(" 1 = NPast_5, 2 = NPast_10, 3 = NFut_5\n")
|
||||||
|
cat(" 4 = NFut_10, 5 = X5.10past, 6 = X5.10fut\n")
|
||||||
|
cat("\n")
|
||||||
|
|
||||||
|
|
||||||
|
# Save the modified dataframe back to CSV
|
||||||
|
# na="" writes NA values as empty cells instead of "NA" text
|
||||||
|
# COMMENTED OUT FOR REVIEW - Uncomment when ready to save
|
||||||
|
write.csv(df, "eohi2.csv", row.names = FALSE, na = "")
|
||||||
|
|
||||||
|
cat("\n*** WRITE TO FILE IS COMMENTED OUT ***\n")
|
||||||
|
cat("Review the output above, then uncomment line 234 to save changes.\n")
|
||||||
|
cat("\nProcessing complete! 18 domain mean columns calculated (not yet saved to file).\n")
|
||||||
95
eohi2/dataP 08 - DGEN 510 vars.r
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
# Script 08: Create 5_10 DGEN Variables
|
||||||
|
# PURPOSE: Calculate absolute differences between 5-year and 10-year DGEN ratings
|
||||||
|
# for both Past and Future directions
|
||||||
|
# VARIABLES CREATED: 6 total (3 domains × 2 time directions)
|
||||||
|
|
||||||
|
library(tidyverse)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Read the data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
print(paste("Dataset dimensions:", paste(dim(data), collapse = " x")))
|
||||||
|
print(paste("Number of participants:", length(unique(data$ResponseId))))
|
||||||
|
|
||||||
|
# Verify source columns exist
|
||||||
|
source_vars <- c("DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
|
||||||
|
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
|
||||||
|
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val",
|
||||||
|
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val")
|
||||||
|
|
||||||
|
missing_vars <- source_vars[!source_vars %in% colnames(data)]
|
||||||
|
if (length(missing_vars) > 0) {
|
||||||
|
stop(paste("ERROR: Missing source variables:", paste(missing_vars, collapse = ", ")))
|
||||||
|
}
|
||||||
|
|
||||||
|
print("All source DGEN variables found!")
|
||||||
|
|
||||||
|
# Calculate 5_10 DGEN variables (absolute differences between 5-year and 10-year)
|
||||||
|
# Formula: |DGEN_5 - DGEN_10|
|
||||||
|
# NOTE: Using X prefix because R adds it to column names starting with numbers
|
||||||
|
|
||||||
|
# PAST direction
|
||||||
|
data$X5_10DGEN_past_pref <- abs(data$DGEN_past_5_Pref - data$DGEN_past_10_Pref)
|
||||||
|
data$X5_10DGEN_past_pers <- abs(data$DGEN_past_5_Pers - data$DGEN_past_10_Pers)
|
||||||
|
data$X5_10DGEN_past_val <- abs(data$DGEN_past_5_Val - data$DGEN_past_10_Val)
|
||||||
|
|
||||||
|
# FUTURE direction
|
||||||
|
data$X5_10DGEN_fut_pref <- abs(data$DGEN_fut_5_Pref - data$DGEN_fut_10_Pref)
|
||||||
|
data$X5_10DGEN_fut_pers <- abs(data$DGEN_fut_5_Pers - data$DGEN_fut_10_Pers)
|
||||||
|
data$X5_10DGEN_fut_val <- abs(data$DGEN_fut_5_Val - data$DGEN_fut_10_Val)
|
||||||
|
|
||||||
|
# Verify variables were created
|
||||||
|
target_vars <- c("X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
|
||||||
|
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val")
|
||||||
|
|
||||||
|
print("\n=== VARIABLES CREATED ===")
|
||||||
|
print(target_vars)
|
||||||
|
|
||||||
|
# Check for missing values
|
||||||
|
for(var in target_vars) {
|
||||||
|
n_missing <- sum(is.na(data[[var]]))
|
||||||
|
pct_missing <- round(100 * n_missing / nrow(data), 2)
|
||||||
|
print(sprintf("%s: %d missing (%.2f%%)", var, n_missing, pct_missing))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Quality check: Display sample rows
|
||||||
|
print("\n=== QUALITY CHECK: Sample Calculations ===")
|
||||||
|
sample_rows <- sample(1:nrow(data), min(5, nrow(data)))
|
||||||
|
|
||||||
|
for(i in sample_rows) {
|
||||||
|
print(sprintf("\nRow %d:", i))
|
||||||
|
print(sprintf(" DGEN_past_5_Pref = %.2f, DGEN_past_10_Pref = %.2f",
|
||||||
|
data$DGEN_past_5_Pref[i], data$DGEN_past_10_Pref[i]))
|
||||||
|
print(sprintf(" → X5_10DGEN_past_pref = %.2f (expected: %.2f)",
|
||||||
|
data$X5_10DGEN_past_pref[i],
|
||||||
|
abs(data$DGEN_past_5_Pref[i] - data$DGEN_past_10_Pref[i])))
|
||||||
|
|
||||||
|
print(sprintf(" DGEN_fut_5_Pers = %.2f, DGEN_fut_10_Pers = %.2f",
|
||||||
|
data$DGEN_fut_5_Pers[i], data$DGEN_fut_10_Pers[i]))
|
||||||
|
print(sprintf(" → X5_10DGEN_fut_pers = %.2f (expected: %.2f)",
|
||||||
|
data$X5_10DGEN_fut_pers[i],
|
||||||
|
abs(data$DGEN_fut_5_Pers[i] - data$DGEN_fut_10_Pers[i])))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Descriptive statistics
|
||||||
|
print("\n=== DESCRIPTIVE STATISTICS ===")
|
||||||
|
desc_stats <- data %>%
|
||||||
|
summarise(across(all_of(target_vars),
|
||||||
|
list(n = ~sum(!is.na(.)),
|
||||||
|
mean = ~round(mean(., na.rm = TRUE), 5),
|
||||||
|
sd = ~round(sd(., na.rm = TRUE), 5),
|
||||||
|
min = ~round(min(., na.rm = TRUE), 5),
|
||||||
|
max = ~round(max(., na.rm = TRUE), 5)),
|
||||||
|
.names = "{.col}_{.fn}"))
|
||||||
|
|
||||||
|
print(t(desc_stats))
|
||||||
|
|
||||||
|
# Save to CSV
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
|
||||||
|
print("\n=== PROCESSING COMPLETE ===")
|
||||||
|
print("Data saved to eohi2.csv")
|
||||||
|
print(paste("Total columns now:", ncol(data)))
|
||||||
|
|
||||||
223
eohi2/dataP 09 - interval x direction means.r
Normal file
@ -0,0 +1,223 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Set 1: NPast_5_mean (15 variables)
|
||||||
|
data$NPast_5_mean <- rowMeans(data[, c(
|
||||||
|
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
|
||||||
|
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
|
||||||
|
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 2: NPast_10_mean (15 variables)
|
||||||
|
data$NPast_10_mean <- rowMeans(data[, c(
|
||||||
|
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
|
||||||
|
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
|
||||||
|
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 3: NFut_5_mean (15 variables)
|
||||||
|
data$NFut_5_mean <- rowMeans(data[, c(
|
||||||
|
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
|
||||||
|
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
|
||||||
|
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 4: NFut_10_mean (15 variables)
|
||||||
|
data$NFut_10_mean <- rowMeans(data[, c(
|
||||||
|
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
|
||||||
|
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
|
||||||
|
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 5: X5.10past_mean (15 variables)
|
||||||
|
data$X5.10past_mean <- rowMeans(data[, c(
|
||||||
|
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
|
||||||
|
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
|
||||||
|
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 6: X5.10fut_mean (15 variables)
|
||||||
|
data$X5.10fut_mean <- rowMeans(data[, c(
|
||||||
|
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
|
||||||
|
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
|
||||||
|
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 7: NPast_global_mean (30 variables - NPast_5 + NPast_10)
|
||||||
|
data$NPast_global_mean <- rowMeans(data[, c(
|
||||||
|
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
|
||||||
|
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
|
||||||
|
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
|
||||||
|
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
|
||||||
|
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
|
||||||
|
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 8: NFut_global_mean (30 variables - NFut_5 + NFut_10)
|
||||||
|
data$NFut_global_mean <- rowMeans(data[, c(
|
||||||
|
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
|
||||||
|
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
|
||||||
|
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
|
||||||
|
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
|
||||||
|
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
|
||||||
|
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 9: X5.10_global_mean (30 variables - X5.10past + X5.10fut)
|
||||||
|
data$X5.10_global_mean <- rowMeans(data[, c(
|
||||||
|
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
|
||||||
|
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
|
||||||
|
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
|
||||||
|
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
|
||||||
|
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
|
||||||
|
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 10: N5_global_mean (30 variables - NPast_5 + NFut_5)
|
||||||
|
data$N5_global_mean <- rowMeans(data[, c(
|
||||||
|
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
|
||||||
|
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
|
||||||
|
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
|
||||||
|
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
|
||||||
|
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
|
||||||
|
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 11: N10_global_mean (30 variables - NPast_10 + NFut_10)
|
||||||
|
data$N10_global_mean <- rowMeans(data[, c(
|
||||||
|
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
|
||||||
|
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
|
||||||
|
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
|
||||||
|
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
|
||||||
|
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
|
||||||
|
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Save the data
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
|
||||||
|
# ===== QA CODE: Check first 5 rows =====
|
||||||
|
cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
|
||||||
|
|
||||||
|
for (i in 1:min(5, nrow(data))) {
|
||||||
|
cat("--- Row", i, "---\n")
|
||||||
|
|
||||||
|
# Set 1: NPast_5_mean
|
||||||
|
calc1 <- mean(as.numeric(data[i, c(
|
||||||
|
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
|
||||||
|
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
|
||||||
|
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("NPast_5_mean: Calculated =", calc1, "| Stored =", data$NPast_5_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc1, data$NPast_5_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 2: NPast_10_mean
|
||||||
|
calc2 <- mean(as.numeric(data[i, c(
|
||||||
|
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
|
||||||
|
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
|
||||||
|
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("NPast_10_mean: Calculated =", calc2, "| Stored =", data$NPast_10_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc2, data$NPast_10_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 3: NFut_5_mean
|
||||||
|
calc3 <- mean(as.numeric(data[i, c(
|
||||||
|
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
|
||||||
|
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
|
||||||
|
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("NFut_5_mean: Calculated =", calc3, "| Stored =", data$NFut_5_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc3, data$NFut_5_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 4: NFut_10_mean
|
||||||
|
calc4 <- mean(as.numeric(data[i, c(
|
||||||
|
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
|
||||||
|
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
|
||||||
|
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("NFut_10_mean: Calculated =", calc4, "| Stored =", data$NFut_10_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc4, data$NFut_10_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 5: X5.10past_mean
|
||||||
|
calc5 <- mean(as.numeric(data[i, c(
|
||||||
|
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
|
||||||
|
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
|
||||||
|
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("X5.10past_mean: Calculated =", calc5, "| Stored =", data$X5.10past_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc5, data$X5.10past_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 6: X5.10fut_mean
|
||||||
|
calc6 <- mean(as.numeric(data[i, c(
|
||||||
|
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
|
||||||
|
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
|
||||||
|
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("X5.10fut_mean: Calculated =", calc6, "| Stored =", data$X5.10fut_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc6, data$X5.10fut_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 7: NPast_global_mean
|
||||||
|
calc7 <- mean(as.numeric(data[i, c(
|
||||||
|
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
|
||||||
|
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
|
||||||
|
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
|
||||||
|
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
|
||||||
|
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
|
||||||
|
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("NPast_global_mean: Calculated =", calc7, "| Stored =", data$NPast_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc7, data$NPast_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 8: NFut_global_mean
|
||||||
|
calc8 <- mean(as.numeric(data[i, c(
|
||||||
|
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
|
||||||
|
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
|
||||||
|
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice",
|
||||||
|
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
|
||||||
|
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
|
||||||
|
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("NFut_global_mean: Calculated =", calc8, "| Stored =", data$NFut_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc8, data$NFut_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 9: X5.10_global_mean
|
||||||
|
calc9 <- mean(as.numeric(data[i, c(
|
||||||
|
"X5.10past_pref_read", "X5.10past_pref_music", "X5.10past_pref_TV", "X5.10past_pref_nap", "X5.10past_pref_travel",
|
||||||
|
"X5.10past_pers_extravert", "X5.10past_pers_critical", "X5.10past_pers_dependable", "X5.10past_pers_anxious", "X5.10past_pers_complex",
|
||||||
|
"X5.10past_val_obey", "X5.10past_val_trad", "X5.10past_val_opinion", "X5.10past_val_performance", "X5.10past_val_justice",
|
||||||
|
"X5.10fut_pref_read", "X5.10fut_pref_music", "X5.10fut_pref_TV", "X5.10fut_pref_nap", "X5.10fut_pref_travel",
|
||||||
|
"X5.10fut_pers_extravert", "X5.10fut_pers_critical", "X5.10fut_pers_dependable", "X5.10fut_pers_anxious", "X5.10fut_pers_complex",
|
||||||
|
"X5.10fut_val_obey", "X5.10fut_val_trad", "X5.10fut_val_opinion", "X5.10fut_val_performance", "X5.10fut_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("X5.10_global_mean: Calculated =", calc9, "| Stored =", data$X5.10_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc9, data$X5.10_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 10: N5_global_mean
|
||||||
|
calc10 <- mean(as.numeric(data[i, c(
|
||||||
|
"NPast_5_pref_read", "NPast_5_pref_music", "NPast_5_pref_TV", "NPast_5_pref_nap", "NPast_5_pref_travel",
|
||||||
|
"NPast_5_pers_extravert", "NPast_5_pers_critical", "NPast_5_pers_dependable", "NPast_5_pers_anxious", "NPast_5_pers_complex",
|
||||||
|
"NPast_5_val_obey", "NPast_5_val_trad", "NPast_5_val_opinion", "NPast_5_val_performance", "NPast_5_val_justice",
|
||||||
|
"NFut_5_pref_read", "NFut_5_pref_music", "NFut_5_pref_TV", "NFut_5_pref_nap", "NFut_5_pref_travel",
|
||||||
|
"NFut_5_pers_extravert", "NFut_5_pers_critical", "NFut_5_pers_dependable", "NFut_5_pers_anxious", "NFut_5_pers_complex",
|
||||||
|
"NFut_5_val_obey", "NFut_5_val_trad", "NFut_5_val_opinion", "NFut_5_val_performance", "NFut_5_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("N5_global_mean: Calculated =", calc10, "| Stored =", data$N5_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc10, data$N5_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 11: N10_global_mean
|
||||||
|
calc11 <- mean(as.numeric(data[i, c(
|
||||||
|
"NPast_10_pref_read", "NPast_10_pref_music", "NPast_10_pref_TV", "NPast_10_pref_nap", "NPast_10_pref_travel",
|
||||||
|
"NPast_10_pers_extravert", "NPast_10_pers_critical", "NPast_10_pers_dependable", "NPast_10_pers_anxious", "NPast_10_pers_complex",
|
||||||
|
"NPast_10_val_obey", "NPast_10_val_trad", "NPast_10_val_opinion", "NPast_10_val_performance", "NPast_10_val_justice",
|
||||||
|
"NFut_10_pref_read", "NFut_10_pref_music", "NFut_10_pref_TV", "NFut_10_pref_nap", "NFut_10_pref_travel",
|
||||||
|
"NFut_10_pers_extravert", "NFut_10_pers_critical", "NFut_10_pers_dependable", "NFut_10_pers_anxious", "NFut_10_pers_complex",
|
||||||
|
"NFut_10_val_obey", "NFut_10_val_trad", "NFut_10_val_opinion", "NFut_10_val_performance", "NFut_10_val_justice"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("N10_global_mean: Calculated =", calc11, "| Stored =", data$N10_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc11, data$N10_global_mean[i])), "\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("=== QA CHECK COMPLETE ===\n")
|
||||||
115
eohi2/dataP 10 - DGEN mean vars.r
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Set 1: DGEN_past_5.10_mean (3 variables)
|
||||||
|
data$DGEN_past_5.10_mean <- rowMeans(data[, c(
|
||||||
|
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 2: DGEN_fut_5.10_mean (3 variables)
|
||||||
|
data$DGEN_fut_5.10_mean <- rowMeans(data[, c(
|
||||||
|
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 3: DGENpast_global_mean (9 variables)
|
||||||
|
data$DGENpast_global_mean <- rowMeans(data[, c(
|
||||||
|
"DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
|
||||||
|
"DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
|
||||||
|
"DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 4: DGENfut_global_mean (9 variables)
|
||||||
|
data$DGENfut_global_mean <- rowMeans(data[, c(
|
||||||
|
"DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
|
||||||
|
"DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
|
||||||
|
"DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 5: DGEN_5_global_mean (6 variables)
|
||||||
|
data$DGEN_5_global_mean <- rowMeans(data[, c(
|
||||||
|
"DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
|
||||||
|
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 6: DGEN_10_global_mean (6 variables)
|
||||||
|
data$DGEN_10_global_mean <- rowMeans(data[, c(
|
||||||
|
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
|
||||||
|
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Set 7: DGEN_5.10_global_mean (6 variables)
|
||||||
|
data$DGEN_5.10_global_mean <- rowMeans(data[, c(
|
||||||
|
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
|
||||||
|
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
|
||||||
|
)], na.rm = TRUE)
|
||||||
|
|
||||||
|
# Save the data
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
|
||||||
|
# ===== QA CODE: Check first 5 rows =====
|
||||||
|
cat("\n=== QUALITY ASSURANCE: Checking calculations for first 5 rows ===\n\n")
|
||||||
|
|
||||||
|
for (i in 1:min(5, nrow(data))) {
|
||||||
|
cat("--- Row", i, "---\n")
|
||||||
|
|
||||||
|
# Set 1: DGEN_past_5.10_mean
|
||||||
|
calc1 <- mean(as.numeric(data[i, c(
|
||||||
|
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGEN_past_5.10_mean: Calculated =", calc1, "| Stored =", data$DGEN_past_5.10_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc1, data$DGEN_past_5.10_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 2: DGEN_fut_5.10_mean
|
||||||
|
calc2 <- mean(as.numeric(data[i, c(
|
||||||
|
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGEN_fut_5.10_mean: Calculated =", calc2, "| Stored =", data$DGEN_fut_5.10_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc2, data$DGEN_fut_5.10_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 3: DGENpast_global_mean
|
||||||
|
calc3 <- mean(as.numeric(data[i, c(
|
||||||
|
"DGEN_past_5_Pref", "DGEN_past_10_Pref", "X5_10DGEN_past_pref",
|
||||||
|
"DGEN_past_5_Pers", "DGEN_past_10_Pers", "X5_10DGEN_past_pers",
|
||||||
|
"DGEN_past_5_Val", "DGEN_past_10_Val", "X5_10DGEN_past_val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGENpast_global_mean: Calculated =", calc3, "| Stored =", data$DGENpast_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc3, data$DGENpast_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 4: DGENfut_global_mean
|
||||||
|
calc4 <- mean(as.numeric(data[i, c(
|
||||||
|
"DGEN_fut_5_Pref", "DGEN_fut_10_Pref", "X5_10DGEN_fut_pref",
|
||||||
|
"DGEN_fut_5_Pers", "DGEN_fut_10_Pers", "X5_10DGEN_fut_pers",
|
||||||
|
"DGEN_fut_5_Val", "DGEN_fut_10_Val", "X5_10DGEN_fut_val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGENfut_global_mean: Calculated =", calc4, "| Stored =", data$DGENfut_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc4, data$DGENfut_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 5: DGEN_5_global_mean
|
||||||
|
calc5 <- mean(as.numeric(data[i, c(
|
||||||
|
"DGEN_past_5_Pref", "DGEN_past_5_Pers", "DGEN_past_5_Val",
|
||||||
|
"DGEN_fut_5_Pref", "DGEN_fut_5_Pers", "DGEN_fut_5_Val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGEN_5_global_mean: Calculated =", calc5, "| Stored =", data$DGEN_5_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc5, data$DGEN_5_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 6: DGEN_10_global_mean
|
||||||
|
calc6 <- mean(as.numeric(data[i, c(
|
||||||
|
"DGEN_past_10_Pref", "DGEN_past_10_Pers", "DGEN_past_10_Val",
|
||||||
|
"DGEN_fut_10_Pref", "DGEN_fut_10_Pers", "DGEN_fut_10_Val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGEN_10_global_mean: Calculated =", calc6, "| Stored =", data$DGEN_10_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc6, data$DGEN_10_global_mean[i])), "\n")
|
||||||
|
|
||||||
|
# Set 7: DGEN_5.10_global_mean
|
||||||
|
calc7 <- mean(as.numeric(data[i, c(
|
||||||
|
"X5_10DGEN_past_pref", "X5_10DGEN_past_pers", "X5_10DGEN_past_val",
|
||||||
|
"X5_10DGEN_fut_pref", "X5_10DGEN_fut_pers", "X5_10DGEN_fut_val"
|
||||||
|
)]), na.rm = TRUE)
|
||||||
|
cat("DGEN_5.10_global_mean: Calculated =", calc7, "| Stored =", data$DGEN_5.10_global_mean[i],
|
||||||
|
"| Match:", isTRUE(all.equal(calc7, data$DGEN_5.10_global_mean[i])), "\n\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("=== QA CHECK COMPLETE ===\n")
|
||||||
235
eohi2/dataP 11 - CORRECT ehi vars.r
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Create EHI difference variables (NPast - NFut) for different time intervals
|
||||||
|
|
||||||
|
# === 5-YEAR PAST-FUTURE PAIRS ===
|
||||||
|
# Preferences
|
||||||
|
data$ehi5_pref_read <- data$NPast_5_pref_read - data$NFut_5_pref_read
|
||||||
|
data$ehi5_pref_music <- data$NPast_5_pref_music - data$NFut_5_pref_music
|
||||||
|
data$ehi5_pref_TV <- data$NPast_5_pref_TV - data$NFut_5_pref_TV
|
||||||
|
data$ehi5_pref_nap <- data$NPast_5_pref_nap - data$NFut_5_pref_nap
|
||||||
|
data$ehi5_pref_travel <- data$NPast_5_pref_travel - data$NFut_5_pref_travel
|
||||||
|
|
||||||
|
# Personality
|
||||||
|
data$ehi5_pers_extravert <- data$NPast_5_pers_extravert - data$NFut_5_pers_extravert
|
||||||
|
data$ehi5_pers_critical <- data$NPast_5_pers_critical - data$NFut_5_pers_critical
|
||||||
|
data$ehi5_pers_dependable <- data$NPast_5_pers_dependable - data$NFut_5_pers_dependable
|
||||||
|
data$ehi5_pers_anxious <- data$NPast_5_pers_anxious - data$NFut_5_pers_anxious
|
||||||
|
data$ehi5_pers_complex <- data$NPast_5_pers_complex - data$NFut_5_pers_complex
|
||||||
|
|
||||||
|
# Values
|
||||||
|
data$ehi5_val_obey <- data$NPast_5_val_obey - data$NFut_5_val_obey
|
||||||
|
data$ehi5_val_trad <- data$NPast_5_val_trad - data$NFut_5_val_trad
|
||||||
|
data$ehi5_val_opinion <- data$NPast_5_val_opinion - data$NFut_5_val_opinion
|
||||||
|
data$ehi5_val_performance <- data$NPast_5_val_performance - data$NFut_5_val_performance
|
||||||
|
data$ehi5_val_justice <- data$NPast_5_val_justice - data$NFut_5_val_justice
|
||||||
|
|
||||||
|
# === 10-YEAR PAST-FUTURE PAIRS ===
|
||||||
|
# Preferences
|
||||||
|
data$ehi10_pref_read <- data$NPast_10_pref_read - data$NFut_10_pref_read
|
||||||
|
data$ehi10_pref_music <- data$NPast_10_pref_music - data$NFut_10_pref_music
|
||||||
|
data$ehi10_pref_TV <- data$NPast_10_pref_TV - data$NFut_10_pref_TV
|
||||||
|
data$ehi10_pref_nap <- data$NPast_10_pref_nap - data$NFut_10_pref_nap
|
||||||
|
data$ehi10_pref_travel <- data$NPast_10_pref_travel - data$NFut_10_pref_travel
|
||||||
|
|
||||||
|
# Personality
|
||||||
|
data$ehi10_pers_extravert <- data$NPast_10_pers_extravert - data$NFut_10_pers_extravert
|
||||||
|
data$ehi10_pers_critical <- data$NPast_10_pers_critical - data$NFut_10_pers_critical
|
||||||
|
data$ehi10_pers_dependable <- data$NPast_10_pers_dependable - data$NFut_10_pers_dependable
|
||||||
|
data$ehi10_pers_anxious <- data$NPast_10_pers_anxious - data$NFut_10_pers_anxious
|
||||||
|
data$ehi10_pers_complex <- data$NPast_10_pers_complex - data$NFut_10_pers_complex
|
||||||
|
|
||||||
|
# Values
|
||||||
|
data$ehi10_val_obey <- data$NPast_10_val_obey - data$NFut_10_val_obey
|
||||||
|
data$ehi10_val_trad <- data$NPast_10_val_trad - data$NFut_10_val_trad
|
||||||
|
data$ehi10_val_opinion <- data$NPast_10_val_opinion - data$NFut_10_val_opinion
|
||||||
|
data$ehi10_val_performance <- data$NPast_10_val_performance - data$NFut_10_val_performance
|
||||||
|
data$ehi10_val_justice <- data$NPast_10_val_justice - data$NFut_10_val_justice
|
||||||
|
|
||||||
|
# === 5-10 YEAR CHANGE VARIABLES ===
|
||||||
|
# Preferences
|
||||||
|
data$ehi5.10_pref_read <- data$X5.10past_pref_read - data$X5.10fut_pref_read
|
||||||
|
data$ehi5.10_pref_music <- data$X5.10past_pref_music - data$X5.10fut_pref_music
|
||||||
|
data$ehi5.10_pref_TV <- data$X5.10past_pref_TV - data$X5.10fut_pref_TV
|
||||||
|
data$ehi5.10_pref_nap <- data$X5.10past_pref_nap - data$X5.10fut_pref_nap
|
||||||
|
data$ehi5.10_pref_travel <- data$X5.10past_pref_travel - data$X5.10fut_pref_travel
|
||||||
|
|
||||||
|
# Personality
|
||||||
|
data$ehi5.10_pers_extravert <- data$X5.10past_pers_extravert - data$X5.10fut_pers_extravert
|
||||||
|
data$ehi5.10_pers_critical <- data$X5.10past_pers_critical - data$X5.10fut_pers_critical
|
||||||
|
data$ehi5.10_pers_dependable <- data$X5.10past_pers_dependable - data$X5.10fut_pers_dependable
|
||||||
|
data$ehi5.10_pers_anxious <- data$X5.10past_pers_anxious - data$X5.10fut_pers_anxious
|
||||||
|
data$ehi5.10_pers_complex <- data$X5.10past_pers_complex - data$X5.10fut_pers_complex
|
||||||
|
|
||||||
|
# Values
|
||||||
|
data$ehi5.10_val_obey <- data$X5.10past_val_obey - data$X5.10fut_val_obey
|
||||||
|
data$ehi5.10_val_trad <- data$X5.10past_val_trad - data$X5.10fut_val_trad
|
||||||
|
data$ehi5.10_val_opinion <- data$X5.10past_val_opinion - data$X5.10fut_val_opinion
|
||||||
|
data$ehi5.10_val_performance <- data$X5.10past_val_performance - data$X5.10fut_val_performance
|
||||||
|
data$ehi5.10_val_justice <- data$X5.10past_val_justice - data$X5.10fut_val_justice
|
||||||
|
|
||||||
|
# QA: Verify calculations - FIRST 5 ROWS with detailed output
|
||||||
|
cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
|
||||||
|
|
||||||
|
cat("--- 5-YEAR VARIABLES ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
cat(sprintf("\nRow %d:\n", i))
|
||||||
|
cat(sprintf(" pref_read: %g - %g = %g | ehi5_pref_read = %g %s\n",
|
||||||
|
data$NPast_5_pref_read[i], data$NFut_5_pref_read[i],
|
||||||
|
data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i],
|
||||||
|
data$ehi5_pref_read[i],
|
||||||
|
ifelse(abs((data$NPast_5_pref_read[i] - data$NFut_5_pref_read[i]) - data$ehi5_pref_read[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" pref_music: %g - %g = %g | ehi5_pref_music = %g %s\n",
|
||||||
|
data$NPast_5_pref_music[i], data$NFut_5_pref_music[i],
|
||||||
|
data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i],
|
||||||
|
data$ehi5_pref_music[i],
|
||||||
|
ifelse(abs((data$NPast_5_pref_music[i] - data$NFut_5_pref_music[i]) - data$ehi5_pref_music[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" pers_extravert: %g - %g = %g | ehi5_pers_extravert = %g %s\n",
|
||||||
|
data$NPast_5_pers_extravert[i], data$NFut_5_pers_extravert[i],
|
||||||
|
data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i],
|
||||||
|
data$ehi5_pers_extravert[i],
|
||||||
|
ifelse(abs((data$NPast_5_pers_extravert[i] - data$NFut_5_pers_extravert[i]) - data$ehi5_pers_extravert[i]) < 1e-10, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- 10-YEAR VARIABLES ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
cat(sprintf("\nRow %d:\n", i))
|
||||||
|
cat(sprintf(" pref_read: %g - %g = %g | ehi10_pref_read = %g %s\n",
|
||||||
|
data$NPast_10_pref_read[i], data$NFut_10_pref_read[i],
|
||||||
|
data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i],
|
||||||
|
data$ehi10_pref_read[i],
|
||||||
|
ifelse(abs((data$NPast_10_pref_read[i] - data$NFut_10_pref_read[i]) - data$ehi10_pref_read[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" pref_music: %g - %g = %g | ehi10_pref_music = %g %s\n",
|
||||||
|
data$NPast_10_pref_music[i], data$NFut_10_pref_music[i],
|
||||||
|
data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i],
|
||||||
|
data$ehi10_pref_music[i],
|
||||||
|
ifelse(abs((data$NPast_10_pref_music[i] - data$NFut_10_pref_music[i]) - data$ehi10_pref_music[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" pers_extravert: %g - %g = %g | ehi10_pers_extravert = %g %s\n",
|
||||||
|
data$NPast_10_pers_extravert[i], data$NFut_10_pers_extravert[i],
|
||||||
|
data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i],
|
||||||
|
data$ehi10_pers_extravert[i],
|
||||||
|
ifelse(abs((data$NPast_10_pers_extravert[i] - data$NFut_10_pers_extravert[i]) - data$ehi10_pers_extravert[i]) < 1e-10, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- 5-10 YEAR CHANGE VARIABLES ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
cat(sprintf("\nRow %d:\n", i))
|
||||||
|
cat(sprintf(" pref_read: %g - %g = %g | ehi5.10_pref_read = %g %s\n",
|
||||||
|
data$X5.10past_pref_read[i], data$X5.10fut_pref_read[i],
|
||||||
|
data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i],
|
||||||
|
data$ehi5.10_pref_read[i],
|
||||||
|
ifelse(abs((data$X5.10past_pref_read[i] - data$X5.10fut_pref_read[i]) - data$ehi5.10_pref_read[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" pref_music: %g - %g = %g | ehi5.10_pref_music = %g %s\n",
|
||||||
|
data$X5.10past_pref_music[i], data$X5.10fut_pref_music[i],
|
||||||
|
data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i],
|
||||||
|
data$ehi5.10_pref_music[i],
|
||||||
|
ifelse(abs((data$X5.10past_pref_music[i] - data$X5.10fut_pref_music[i]) - data$ehi5.10_pref_music[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" pers_extravert: %g - %g = %g | ehi5.10_pers_extravert = %g %s\n",
|
||||||
|
data$X5.10past_pers_extravert[i], data$X5.10fut_pers_extravert[i],
|
||||||
|
data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i],
|
||||||
|
data$ehi5.10_pers_extravert[i],
|
||||||
|
ifelse(abs((data$X5.10past_pers_extravert[i] - data$X5.10fut_pers_extravert[i]) - data$ehi5.10_pers_extravert[i]) < 1e-10, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Full QA check for all rows and all variables
|
||||||
|
cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
|
||||||
|
|
||||||
|
qa_pairs <- list(
|
||||||
|
# 5-year pairs
|
||||||
|
list(npast = "NPast_5_pref_read", nfut = "NFut_5_pref_read", target = "ehi5_pref_read"),
|
||||||
|
list(npast = "NPast_5_pref_music", nfut = "NFut_5_pref_music", target = "ehi5_pref_music"),
|
||||||
|
list(npast = "NPast_5_pref_TV", nfut = "NFut_5_pref_TV", target = "ehi5_pref_TV"),
|
||||||
|
list(npast = "NPast_5_pref_nap", nfut = "NFut_5_pref_nap", target = "ehi5_pref_nap"),
|
||||||
|
list(npast = "NPast_5_pref_travel", nfut = "NFut_5_pref_travel", target = "ehi5_pref_travel"),
|
||||||
|
list(npast = "NPast_5_pers_extravert", nfut = "NFut_5_pers_extravert", target = "ehi5_pers_extravert"),
|
||||||
|
list(npast = "NPast_5_pers_critical", nfut = "NFut_5_pers_critical", target = "ehi5_pers_critical"),
|
||||||
|
list(npast = "NPast_5_pers_dependable", nfut = "NFut_5_pers_dependable", target = "ehi5_pers_dependable"),
|
||||||
|
list(npast = "NPast_5_pers_anxious", nfut = "NFut_5_pers_anxious", target = "ehi5_pers_anxious"),
|
||||||
|
list(npast = "NPast_5_pers_complex", nfut = "NFut_5_pers_complex", target = "ehi5_pers_complex"),
|
||||||
|
list(npast = "NPast_5_val_obey", nfut = "NFut_5_val_obey", target = "ehi5_val_obey"),
|
||||||
|
list(npast = "NPast_5_val_trad", nfut = "NFut_5_val_trad", target = "ehi5_val_trad"),
|
||||||
|
list(npast = "NPast_5_val_opinion", nfut = "NFut_5_val_opinion", target = "ehi5_val_opinion"),
|
||||||
|
list(npast = "NPast_5_val_performance", nfut = "NFut_5_val_performance", target = "ehi5_val_performance"),
|
||||||
|
list(npast = "NPast_5_val_justice", nfut = "NFut_5_val_justice", target = "ehi5_val_justice"),
|
||||||
|
|
||||||
|
# 10-year pairs
|
||||||
|
list(npast = "NPast_10_pref_read", nfut = "NFut_10_pref_read", target = "ehi10_pref_read"),
|
||||||
|
list(npast = "NPast_10_pref_music", nfut = "NFut_10_pref_music", target = "ehi10_pref_music"),
|
||||||
|
list(npast = "NPast_10_pref_TV", nfut = "NFut_10_pref_TV", target = "ehi10_pref_TV"),
|
||||||
|
list(npast = "NPast_10_pref_nap", nfut = "NFut_10_pref_nap", target = "ehi10_pref_nap"),
|
||||||
|
list(npast = "NPast_10_pref_travel", nfut = "NFut_10_pref_travel", target = "ehi10_pref_travel"),
|
||||||
|
list(npast = "NPast_10_pers_extravert", nfut = "NFut_10_pers_extravert", target = "ehi10_pers_extravert"),
|
||||||
|
list(npast = "NPast_10_pers_critical", nfut = "NFut_10_pers_critical", target = "ehi10_pers_critical"),
|
||||||
|
list(npast = "NPast_10_pers_dependable", nfut = "NFut_10_pers_dependable", target = "ehi10_pers_dependable"),
|
||||||
|
list(npast = "NPast_10_pers_anxious", nfut = "NFut_10_pers_anxious", target = "ehi10_pers_anxious"),
|
||||||
|
list(npast = "NPast_10_pers_complex", nfut = "NFut_10_pers_complex", target = "ehi10_pers_complex"),
|
||||||
|
list(npast = "NPast_10_val_obey", nfut = "NFut_10_val_obey", target = "ehi10_val_obey"),
|
||||||
|
list(npast = "NPast_10_val_trad", nfut = "NFut_10_val_trad", target = "ehi10_val_trad"),
|
||||||
|
list(npast = "NPast_10_val_opinion", nfut = "NFut_10_val_opinion", target = "ehi10_val_opinion"),
|
||||||
|
list(npast = "NPast_10_val_performance", nfut = "NFut_10_val_performance", target = "ehi10_val_performance"),
|
||||||
|
list(npast = "NPast_10_val_justice", nfut = "NFut_10_val_justice", target = "ehi10_val_justice"),
|
||||||
|
|
||||||
|
# 5-10 year change pairs
|
||||||
|
list(npast = "X5.10past_pref_read", nfut = "X5.10fut_pref_read", target = "ehi5.10_pref_read"),
|
||||||
|
list(npast = "X5.10past_pref_music", nfut = "X5.10fut_pref_music", target = "ehi5.10_pref_music"),
|
||||||
|
list(npast = "X5.10past_pref_TV", nfut = "X5.10fut_pref_TV", target = "ehi5.10_pref_TV"),
|
||||||
|
list(npast = "X5.10past_pref_nap", nfut = "X5.10fut_pref_nap", target = "ehi5.10_pref_nap"),
|
||||||
|
list(npast = "X5.10past_pref_travel", nfut = "X5.10fut_pref_travel", target = "ehi5.10_pref_travel"),
|
||||||
|
list(npast = "X5.10past_pers_extravert", nfut = "X5.10fut_pers_extravert", target = "ehi5.10_pers_extravert"),
|
||||||
|
list(npast = "X5.10past_pers_critical", nfut = "X5.10fut_pers_critical", target = "ehi5.10_pers_critical"),
|
||||||
|
list(npast = "X5.10past_pers_dependable", nfut = "X5.10fut_pers_dependable", target = "ehi5.10_pers_dependable"),
|
||||||
|
list(npast = "X5.10past_pers_anxious", nfut = "X5.10fut_pers_anxious", target = "ehi5.10_pers_anxious"),
|
||||||
|
list(npast = "X5.10past_pers_complex", nfut = "X5.10fut_pers_complex", target = "ehi5.10_pers_complex"),
|
||||||
|
list(npast = "X5.10past_val_obey", nfut = "X5.10fut_val_obey", target = "ehi5.10_val_obey"),
|
||||||
|
list(npast = "X5.10past_val_trad", nfut = "X5.10fut_val_trad", target = "ehi5.10_val_trad"),
|
||||||
|
list(npast = "X5.10past_val_opinion", nfut = "X5.10fut_val_opinion", target = "ehi5.10_val_opinion"),
|
||||||
|
list(npast = "X5.10past_val_performance", nfut = "X5.10fut_val_performance", target = "ehi5.10_val_performance"),
|
||||||
|
list(npast = "X5.10past_val_justice", nfut = "X5.10fut_val_justice", target = "ehi5.10_val_justice")
|
||||||
|
)
|
||||||
|
|
||||||
|
all_checks_passed <- TRUE
|
||||||
|
|
||||||
|
for (pair in qa_pairs) {
|
||||||
|
# Calculate expected difference
|
||||||
|
expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
|
||||||
|
|
||||||
|
# Get actual value in target variable
|
||||||
|
actual_value <- data[[pair$target]]
|
||||||
|
|
||||||
|
# Compare (allowing for floating point precision issues)
|
||||||
|
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
|
||||||
|
|
||||||
|
if (length(discrepancies) > 0) {
|
||||||
|
cat(sprintf("FAIL: %s\n", pair$target))
|
||||||
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
||||||
|
length(discrepancies),
|
||||||
|
paste(head(discrepancies, 10), collapse = ", ")))
|
||||||
|
|
||||||
|
# Show first discrepancy details
|
||||||
|
row_num <- discrepancies[1]
|
||||||
|
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
|
||||||
|
row_num,
|
||||||
|
pair$npast, data[[pair$npast]][row_num],
|
||||||
|
pair$nfut, data[[pair$nfut]][row_num],
|
||||||
|
expected_diff[row_num],
|
||||||
|
pair$target, actual_value[row_num]))
|
||||||
|
all_checks_passed <- FALSE
|
||||||
|
} else {
|
||||||
|
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
if (all_checks_passed) {
|
||||||
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
||||||
|
} else {
|
||||||
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated dataset
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
cat("\nDataset saved to eohi2.csv\n")
|
||||||
118
eohi2/datap 12 - CORRECT DGEN ehi vars.r
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Create DGEN EHI difference variables (Past - Future) for different time intervals
|
||||||
|
|
||||||
|
# === 5-YEAR DGEN PAST-FUTURE PAIRS ===
|
||||||
|
data$ehiDGEN_5_Pref <- data$DGEN_past_5_Pref - data$DGEN_fut_5_Pref
|
||||||
|
data$ehiDGEN_5_Pers <- data$DGEN_past_5_Pers - data$DGEN_fut_5_Pers
|
||||||
|
data$ehiDGEN_5_Val <- data$DGEN_past_5_Val - data$DGEN_fut_5_Val
|
||||||
|
|
||||||
|
# === 10-YEAR DGEN PAST-FUTURE PAIRS ===
|
||||||
|
data$ehiDGEN_10_Pref <- data$DGEN_past_10_Pref - data$DGEN_fut_10_Pref
|
||||||
|
data$ehiDGEN_10_Pers <- data$DGEN_past_10_Pers - data$DGEN_fut_10_Pers
|
||||||
|
data$ehiDGEN_10_Val <- data$DGEN_past_10_Val - data$DGEN_fut_10_Val
|
||||||
|
|
||||||
|
# QA: Verify calculations - FIRST 5 ROWS with detailed output
|
||||||
|
cat("\n=== QUALITY ASSURANCE CHECK - FIRST 5 ROWS ===\n\n")
|
||||||
|
|
||||||
|
cat("--- 5-YEAR DGEN VARIABLES ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
cat(sprintf("\nRow %d:\n", i))
|
||||||
|
cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_5_Pref = %g %s\n",
|
||||||
|
data$DGEN_past_5_Pref[i], data$DGEN_fut_5_Pref[i],
|
||||||
|
data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i],
|
||||||
|
data$ehiDGEN_5_Pref[i],
|
||||||
|
ifelse(abs((data$DGEN_past_5_Pref[i] - data$DGEN_fut_5_Pref[i]) - data$ehiDGEN_5_Pref[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_5_Pers = %g %s\n",
|
||||||
|
data$DGEN_past_5_Pers[i], data$DGEN_fut_5_Pers[i],
|
||||||
|
data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i],
|
||||||
|
data$ehiDGEN_5_Pers[i],
|
||||||
|
ifelse(abs((data$DGEN_past_5_Pers[i] - data$DGEN_fut_5_Pers[i]) - data$ehiDGEN_5_Pers[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" Val: %g - %g = %g | ehiDGEN_5_Val = %g %s\n",
|
||||||
|
data$DGEN_past_5_Val[i], data$DGEN_fut_5_Val[i],
|
||||||
|
data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i],
|
||||||
|
data$ehiDGEN_5_Val[i],
|
||||||
|
ifelse(abs((data$DGEN_past_5_Val[i] - data$DGEN_fut_5_Val[i]) - data$ehiDGEN_5_Val[i]) < 1e-10, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- 10-YEAR DGEN VARIABLES ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
cat(sprintf("\nRow %d:\n", i))
|
||||||
|
cat(sprintf(" Pref: %g - %g = %g | ehiDGEN_10_Pref = %g %s\n",
|
||||||
|
data$DGEN_past_10_Pref[i], data$DGEN_fut_10_Pref[i],
|
||||||
|
data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i],
|
||||||
|
data$ehiDGEN_10_Pref[i],
|
||||||
|
ifelse(abs((data$DGEN_past_10_Pref[i] - data$DGEN_fut_10_Pref[i]) - data$ehiDGEN_10_Pref[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" Pers: %g - %g = %g | ehiDGEN_10_Pers = %g %s\n",
|
||||||
|
data$DGEN_past_10_Pers[i], data$DGEN_fut_10_Pers[i],
|
||||||
|
data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i],
|
||||||
|
data$ehiDGEN_10_Pers[i],
|
||||||
|
ifelse(abs((data$DGEN_past_10_Pers[i] - data$DGEN_fut_10_Pers[i]) - data$ehiDGEN_10_Pers[i]) < 1e-10, "✓", "✗")))
|
||||||
|
cat(sprintf(" Val: %g - %g = %g | ehiDGEN_10_Val = %g %s\n",
|
||||||
|
data$DGEN_past_10_Val[i], data$DGEN_fut_10_Val[i],
|
||||||
|
data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i],
|
||||||
|
data$ehiDGEN_10_Val[i],
|
||||||
|
ifelse(abs((data$DGEN_past_10_Val[i] - data$DGEN_fut_10_Val[i]) - data$ehiDGEN_10_Val[i]) < 1e-10, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Full QA check for all rows and all variables
|
||||||
|
cat("\n\n=== OVERALL QA CHECK (ALL ROWS, ALL VARIABLES) ===\n")
|
||||||
|
|
||||||
|
qa_pairs <- list(
|
||||||
|
# 5-year DGEN pairs
|
||||||
|
list(npast = "DGEN_past_5_Pref", nfut = "DGEN_fut_5_Pref", target = "ehiDGEN_5_Pref"),
|
||||||
|
list(npast = "DGEN_past_5_Pers", nfut = "DGEN_fut_5_Pers", target = "ehiDGEN_5_Pers"),
|
||||||
|
list(npast = "DGEN_past_5_Val", nfut = "DGEN_fut_5_Val", target = "ehiDGEN_5_Val"),
|
||||||
|
|
||||||
|
# 10-year DGEN pairs
|
||||||
|
list(npast = "DGEN_past_10_Pref", nfut = "DGEN_fut_10_Pref", target = "ehiDGEN_10_Pref"),
|
||||||
|
list(npast = "DGEN_past_10_Pers", nfut = "DGEN_fut_10_Pers", target = "ehiDGEN_10_Pers"),
|
||||||
|
list(npast = "DGEN_past_10_Val", nfut = "DGEN_fut_10_Val", target = "ehiDGEN_10_Val")
|
||||||
|
)
|
||||||
|
|
||||||
|
all_checks_passed <- TRUE
|
||||||
|
|
||||||
|
for (pair in qa_pairs) {
|
||||||
|
# Calculate expected difference
|
||||||
|
expected_diff <- data[[pair$npast]] - data[[pair$nfut]]
|
||||||
|
|
||||||
|
# Get actual value in target variable
|
||||||
|
actual_value <- data[[pair$target]]
|
||||||
|
|
||||||
|
# Compare (allowing for floating point precision issues)
|
||||||
|
discrepancies <- which(abs(expected_diff - actual_value) > 1e-10)
|
||||||
|
|
||||||
|
if (length(discrepancies) > 0) {
|
||||||
|
cat(sprintf("FAIL: %s\n", pair$target))
|
||||||
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
||||||
|
length(discrepancies),
|
||||||
|
paste(head(discrepancies, 10), collapse = ", ")))
|
||||||
|
|
||||||
|
# Show first discrepancy details
|
||||||
|
row_num <- discrepancies[1]
|
||||||
|
cat(sprintf(" Example (row %d): %s (%g) - %s (%g) = %g, but %s = %g\n",
|
||||||
|
row_num,
|
||||||
|
pair$npast, data[[pair$npast]][row_num],
|
||||||
|
pair$nfut, data[[pair$nfut]][row_num],
|
||||||
|
expected_diff[row_num],
|
||||||
|
pair$target, actual_value[row_num]))
|
||||||
|
all_checks_passed <- FALSE
|
||||||
|
} else {
|
||||||
|
cat(sprintf("PASS: %s (n = %d)\n", pair$target, nrow(data)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
if (all_checks_passed) {
|
||||||
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
||||||
|
} else {
|
||||||
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated dataset
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
cat("\nDataset saved to eohi2.csv\n")
|
||||||
161
eohi2/datap 13 - ehi domain specific means.r
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Calculate domain-specific mean scores for EHI variables across time intervals
|
||||||
|
|
||||||
|
# === 5-YEAR MEANS ===
|
||||||
|
data$ehi5_pref_MEAN <- rowMeans(data[, c("ehi5_pref_read", "ehi5_pref_music",
|
||||||
|
"ehi5_pref_TV", "ehi5_pref_nap",
|
||||||
|
"ehi5_pref_travel")], na.rm = TRUE)
|
||||||
|
|
||||||
|
data$ehi5_pers_MEAN <- rowMeans(data[, c("ehi5_pers_extravert", "ehi5_pers_critical",
|
||||||
|
"ehi5_pers_dependable", "ehi5_pers_anxious",
|
||||||
|
"ehi5_pers_complex")], na.rm = TRUE)
|
||||||
|
|
||||||
|
data$ehi5_val_MEAN <- rowMeans(data[, c("ehi5_val_obey", "ehi5_val_trad",
|
||||||
|
"ehi5_val_opinion", "ehi5_val_performance",
|
||||||
|
"ehi5_val_justice")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# === 10-YEAR MEANS ===
|
||||||
|
data$ehi10_pref_MEAN <- rowMeans(data[, c("ehi10_pref_read", "ehi10_pref_music",
|
||||||
|
"ehi10_pref_TV", "ehi10_pref_nap",
|
||||||
|
"ehi10_pref_travel")], na.rm = TRUE)
|
||||||
|
|
||||||
|
data$ehi10_pers_MEAN <- rowMeans(data[, c("ehi10_pers_extravert", "ehi10_pers_critical",
|
||||||
|
"ehi10_pers_dependable", "ehi10_pers_anxious",
|
||||||
|
"ehi10_pers_complex")], na.rm = TRUE)
|
||||||
|
|
||||||
|
data$ehi10_val_MEAN <- rowMeans(data[, c("ehi10_val_obey", "ehi10_val_trad",
|
||||||
|
"ehi10_val_opinion", "ehi10_val_performance",
|
||||||
|
"ehi10_val_justice")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# === 5-10 YEAR CHANGE MEANS ===
|
||||||
|
data$ehi5.10_pref_MEAN <- rowMeans(data[, c("ehi5.10_pref_read", "ehi5.10_pref_music",
|
||||||
|
"ehi5.10_pref_TV", "ehi5.10_pref_nap",
|
||||||
|
"ehi5.10_pref_travel")], na.rm = TRUE)
|
||||||
|
|
||||||
|
data$ehi5.10_pers_MEAN <- rowMeans(data[, c("ehi5.10_pers_extravert", "ehi5.10_pers_critical",
|
||||||
|
"ehi5.10_pers_dependable", "ehi5.10_pers_anxious",
|
||||||
|
"ehi5.10_pers_complex")], na.rm = TRUE)
|
||||||
|
|
||||||
|
data$ehi5.10_val_MEAN <- rowMeans(data[, c("ehi5.10_val_obey", "ehi5.10_val_trad",
|
||||||
|
"ehi5.10_val_opinion", "ehi5.10_val_performance",
|
||||||
|
"ehi5.10_val_justice")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# QA: Verify mean calculations
|
||||||
|
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
|
||||||
|
cat("Verifying EHI domain-specific mean calculations\n\n")
|
||||||
|
|
||||||
|
cat("--- FIRST 5 ROWS: 5-YEAR PREFERENCES MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi5_pref_read[i], data$ehi5_pref_music[i],
|
||||||
|
data$ehi5_pref_TV[i], data$ehi5_pref_nap[i],
|
||||||
|
data$ehi5_pref_travel[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi5_pref_MEAN[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: 5-YEAR PERSONALITY MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi5_pers_extravert[i], data$ehi5_pers_critical[i],
|
||||||
|
data$ehi5_pers_dependable[i], data$ehi5_pers_anxious[i],
|
||||||
|
data$ehi5_pers_complex[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi5_pers_MEAN[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: 10-YEAR PREFERENCES MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi10_pref_read[i], data$ehi10_pref_music[i],
|
||||||
|
data$ehi10_pref_TV[i], data$ehi10_pref_nap[i],
|
||||||
|
data$ehi10_pref_travel[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi10_pref_MEAN[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE PREFERENCES MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi5.10_pref_read[i], data$ehi5.10_pref_music[i],
|
||||||
|
data$ehi5.10_pref_TV[i], data$ehi5.10_pref_nap[i],
|
||||||
|
data$ehi5.10_pref_travel[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi5.10_pref_MEAN[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3], vals[4], vals[5],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Overall QA check for all rows
|
||||||
|
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
|
||||||
|
|
||||||
|
qa_checks <- list(
|
||||||
|
# 5-year means
|
||||||
|
list(vars = c("ehi5_pref_read", "ehi5_pref_music", "ehi5_pref_TV", "ehi5_pref_nap", "ehi5_pref_travel"),
|
||||||
|
target = "ehi5_pref_MEAN", name = "5-Year Preferences"),
|
||||||
|
list(vars = c("ehi5_pers_extravert", "ehi5_pers_critical", "ehi5_pers_dependable", "ehi5_pers_anxious", "ehi5_pers_complex"),
|
||||||
|
target = "ehi5_pers_MEAN", name = "5-Year Personality"),
|
||||||
|
list(vars = c("ehi5_val_obey", "ehi5_val_trad", "ehi5_val_opinion", "ehi5_val_performance", "ehi5_val_justice"),
|
||||||
|
target = "ehi5_val_MEAN", name = "5-Year Values"),
|
||||||
|
|
||||||
|
# 10-year means
|
||||||
|
list(vars = c("ehi10_pref_read", "ehi10_pref_music", "ehi10_pref_TV", "ehi10_pref_nap", "ehi10_pref_travel"),
|
||||||
|
target = "ehi10_pref_MEAN", name = "10-Year Preferences"),
|
||||||
|
list(vars = c("ehi10_pers_extravert", "ehi10_pers_critical", "ehi10_pers_dependable", "ehi10_pers_anxious", "ehi10_pers_complex"),
|
||||||
|
target = "ehi10_pers_MEAN", name = "10-Year Personality"),
|
||||||
|
list(vars = c("ehi10_val_obey", "ehi10_val_trad", "ehi10_val_opinion", "ehi10_val_performance", "ehi10_val_justice"),
|
||||||
|
target = "ehi10_val_MEAN", name = "10-Year Values"),
|
||||||
|
|
||||||
|
# 5-10 year change means
|
||||||
|
list(vars = c("ehi5.10_pref_read", "ehi5.10_pref_music", "ehi5.10_pref_TV", "ehi5.10_pref_nap", "ehi5.10_pref_travel"),
|
||||||
|
target = "ehi5.10_pref_MEAN", name = "5-10 Year Change Preferences"),
|
||||||
|
list(vars = c("ehi5.10_pers_extravert", "ehi5.10_pers_critical", "ehi5.10_pers_dependable", "ehi5.10_pers_anxious", "ehi5.10_pers_complex"),
|
||||||
|
target = "ehi5.10_pers_MEAN", name = "5-10 Year Change Personality"),
|
||||||
|
list(vars = c("ehi5.10_val_obey", "ehi5.10_val_trad", "ehi5.10_val_opinion", "ehi5.10_val_performance", "ehi5.10_val_justice"),
|
||||||
|
target = "ehi5.10_val_MEAN", name = "5-10 Year Change Values")
|
||||||
|
)
|
||||||
|
|
||||||
|
all_checks_passed <- TRUE
|
||||||
|
|
||||||
|
for (check in qa_checks) {
|
||||||
|
calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
|
||||||
|
actual_mean <- data[[check$target]]
|
||||||
|
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
|
||||||
|
|
||||||
|
if (length(discrepancies) > 0) {
|
||||||
|
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
|
||||||
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
||||||
|
length(discrepancies),
|
||||||
|
paste(head(discrepancies, 10), collapse = ", ")))
|
||||||
|
all_checks_passed <- FALSE
|
||||||
|
} else {
|
||||||
|
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
|
||||||
|
check$name, length(check$vars), nrow(data)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
if (all_checks_passed) {
|
||||||
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
||||||
|
} else {
|
||||||
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated dataset
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
cat("\nDataset saved to eohi2.csv\n")
|
||||||
140
eohi2/datap 14 - all ehi global means.r
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Calculate global mean scores for EHI variables across time intervals
|
||||||
|
|
||||||
|
# === DGEN 5-YEAR GLOBAL MEAN ===
|
||||||
|
data$ehiDGEN_5_mean <- rowMeans(data[, c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers",
|
||||||
|
"ehiDGEN_5_Val")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# === DGEN 10-YEAR GLOBAL MEAN ===
|
||||||
|
data$ehiDGEN_10_mean <- rowMeans(data[, c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers",
|
||||||
|
"ehiDGEN_10_Val")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# === 5-YEAR GLOBAL MEAN ===
|
||||||
|
data$ehi5_global_mean <- rowMeans(data[, c("ehi5_pref_MEAN", "ehi5_pers_MEAN",
|
||||||
|
"ehi5_val_MEAN")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# === 10-YEAR GLOBAL MEAN ===
|
||||||
|
data$ehi10_global_mean <- rowMeans(data[, c("ehi10_pref_MEAN", "ehi10_pers_MEAN",
|
||||||
|
"ehi10_val_MEAN")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# === 5-10 YEAR CHANGE GLOBAL MEAN ===
|
||||||
|
data$ehi5.10_global_mean <- rowMeans(data[, c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN",
|
||||||
|
"ehi5.10_val_MEAN")], na.rm = TRUE)
|
||||||
|
|
||||||
|
# QA: Verify mean calculations
|
||||||
|
cat("\n=== QUALITY ASSURANCE CHECK ===\n")
|
||||||
|
cat("Verifying EHI global mean calculations\n\n")
|
||||||
|
|
||||||
|
cat("--- FIRST 5 ROWS: DGEN 5-YEAR GLOBAL MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehiDGEN_5_Pref[i], data$ehiDGEN_5_Pers[i],
|
||||||
|
data$ehiDGEN_5_Val[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehiDGEN_5_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: DGEN 10-YEAR GLOBAL MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehiDGEN_10_Pref[i], data$ehiDGEN_10_Pers[i],
|
||||||
|
data$ehiDGEN_10_Val[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehiDGEN_10_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%g, %g, %g] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: 5-YEAR GLOBAL MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi5_pref_MEAN[i], data$ehi5_pers_MEAN[i],
|
||||||
|
data$ehi5_val_MEAN[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi5_global_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: 10-YEAR GLOBAL MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi10_pref_MEAN[i], data$ehi10_pers_MEAN[i],
|
||||||
|
data$ehi10_val_MEAN[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi10_global_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n--- FIRST 5 ROWS: 5-10 YEAR CHANGE GLOBAL MEAN ---\n")
|
||||||
|
for (i in 1:5) {
|
||||||
|
vals <- c(data$ehi5.10_pref_MEAN[i], data$ehi5.10_pers_MEAN[i],
|
||||||
|
data$ehi5.10_val_MEAN[i])
|
||||||
|
calc_mean <- mean(vals, na.rm = TRUE)
|
||||||
|
actual_mean <- data$ehi5.10_global_mean[i]
|
||||||
|
match <- abs(calc_mean - actual_mean) < 1e-10
|
||||||
|
cat(sprintf("Row %d: [%.5f, %.5f, %.5f] → Calculated: %.5f | Actual: %.5f %s\n",
|
||||||
|
i, vals[1], vals[2], vals[3],
|
||||||
|
calc_mean, actual_mean, ifelse(match, "✓", "✗")))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Overall QA check for all rows
|
||||||
|
cat("\n--- OVERALL QA CHECK (ALL ROWS) ---\n")
|
||||||
|
|
||||||
|
qa_checks <- list(
|
||||||
|
# DGEN global means
|
||||||
|
list(vars = c("ehiDGEN_5_Pref", "ehiDGEN_5_Pers", "ehiDGEN_5_Val"),
|
||||||
|
target = "ehiDGEN_5_mean", name = "DGEN 5-Year Global"),
|
||||||
|
list(vars = c("ehiDGEN_10_Pref", "ehiDGEN_10_Pers", "ehiDGEN_10_Val"),
|
||||||
|
target = "ehiDGEN_10_mean", name = "DGEN 10-Year Global"),
|
||||||
|
|
||||||
|
# Domain-specific global means
|
||||||
|
list(vars = c("ehi5_pref_MEAN", "ehi5_pers_MEAN", "ehi5_val_MEAN"),
|
||||||
|
target = "ehi5_global_mean", name = "5-Year Global"),
|
||||||
|
list(vars = c("ehi10_pref_MEAN", "ehi10_pers_MEAN", "ehi10_val_MEAN"),
|
||||||
|
target = "ehi10_global_mean", name = "10-Year Global"),
|
||||||
|
list(vars = c("ehi5.10_pref_MEAN", "ehi5.10_pers_MEAN", "ehi5.10_val_MEAN"),
|
||||||
|
target = "ehi5.10_global_mean", name = "5-10 Year Change Global")
|
||||||
|
)
|
||||||
|
|
||||||
|
all_checks_passed <- TRUE
|
||||||
|
|
||||||
|
for (check in qa_checks) {
|
||||||
|
calc_mean <- rowMeans(data[, check$vars], na.rm = TRUE)
|
||||||
|
actual_mean <- data[[check$target]]
|
||||||
|
discrepancies <- which(abs(calc_mean - actual_mean) > 1e-10)
|
||||||
|
|
||||||
|
if (length(discrepancies) > 0) {
|
||||||
|
cat(sprintf("FAIL: %s mean (n_vars = %d)\n", check$name, length(check$vars)))
|
||||||
|
cat(sprintf(" Found %d discrepancies in rows: %s\n",
|
||||||
|
length(discrepancies),
|
||||||
|
paste(head(discrepancies, 10), collapse = ", ")))
|
||||||
|
all_checks_passed <- FALSE
|
||||||
|
} else {
|
||||||
|
cat(sprintf("PASS: %s mean (n_vars = %d, n_rows = %d)\n",
|
||||||
|
check$name, length(check$vars), nrow(data)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\n")
|
||||||
|
if (all_checks_passed) {
|
||||||
|
cat("*** ALL QA CHECKS PASSED ***\n")
|
||||||
|
} else {
|
||||||
|
cat("*** SOME QA CHECKS FAILED - REVIEW ABOVE ***\n")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save updated dataset
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
|
cat("\nDataset saved to eohi2.csv\n")
|
||||||
38
eohi2/datap 15 - education recoded ordinal 3.r
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
data <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Check the levels of the demo_edu variable
|
||||||
|
print(levels(factor(data$demo_edu)))
|
||||||
|
|
||||||
|
# Also show the unique values and their frequencies
|
||||||
|
print("\nUnique values and frequencies:")
|
||||||
|
print(table(data$demo_edu, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Recode demo_edu into 3 ordinal levels
|
||||||
|
data$edu3 <- NA
|
||||||
|
|
||||||
|
# HS_TS: High School and Trade School
|
||||||
|
data$edu3[data$demo_edu %in% c("High School (or equivalent)", "Trade School (non-military)")] <- "HS_TS"
|
||||||
|
|
||||||
|
# C_Ug: College and University - Undergraduate
|
||||||
|
data$edu3[data$demo_edu %in% c("College Diploma/Certificate", "University - Undergraduate")] <- "C_Ug"
|
||||||
|
|
||||||
|
# grad_prof: University - Graduate, University - PhD, and Professional Degree
|
||||||
|
data$edu3[data$demo_edu %in% c("University - Graduate (Masters)", "University - PhD", "Professional Degree (ex. JD/MD)")] <- "grad_prof"
|
||||||
|
|
||||||
|
# Convert to ordered factor
|
||||||
|
data$edu3 <- factor(data$edu3,
|
||||||
|
levels = c("HS_TS", "C_Ug", "grad_prof"),
|
||||||
|
ordered = TRUE)
|
||||||
|
|
||||||
|
# Check the recoded variable
|
||||||
|
print(table(data$edu3, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Verify the recoding
|
||||||
|
print(table(data$demo_edu, data$edu3, useNA = "ifany"))
|
||||||
|
|
||||||
|
# Save the updated dataset with the new edu3 variable
|
||||||
|
write.csv(data, "eohi2.csv", row.names = FALSE)
|
||||||
100
eohi2/datap 16 - ehi vars standardized .r
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
options(scipen = 999)
|
||||||
|
|
||||||
|
library(dplyr)
|
||||||
|
|
||||||
|
setwd("C:/Users/irina/Documents/DND/EOHI/eohi2")
|
||||||
|
|
||||||
|
df <- read.csv("eohi2.csv")
|
||||||
|
|
||||||
|
# Display means and standard deviations of non-standardized variables for manual checking
|
||||||
|
print(round(mean(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$ehiDGEN_5_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(mean(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$ehiDGEN_10_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(mean(df$ehi5_global_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$ehi5_global_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(mean(df$ehi10_global_mean, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$ehi10_global_mean, na.rm = TRUE), 5))
|
||||||
|
|
||||||
|
# Calculate means and standard deviations for standardization
|
||||||
|
mean_DGEN_5 <- mean(df$ehiDGEN_5_mean, na.rm = TRUE)
|
||||||
|
sd_DGEN_5 <- sd(df$ehiDGEN_5_mean, na.rm = TRUE)
|
||||||
|
|
||||||
|
mean_DGEN_10 <- mean(df$ehiDGEN_10_mean, na.rm = TRUE)
|
||||||
|
sd_DGEN_10 <- sd(df$ehiDGEN_10_mean, na.rm = TRUE)
|
||||||
|
|
||||||
|
mean_DS_5 <- mean(df$ehi5_global_mean, na.rm = TRUE)
|
||||||
|
sd_DS_5 <- sd(df$ehi5_global_mean, na.rm = TRUE)
|
||||||
|
|
||||||
|
mean_DS_10 <- mean(df$ehi10_global_mean, na.rm = TRUE)
|
||||||
|
sd_DS_10 <- sd(df$ehi10_global_mean, na.rm = TRUE)
|
||||||
|
|
||||||
|
# Create standardized variables
|
||||||
|
df$stdDGEN_5 <- (df$ehiDGEN_5_mean - mean_DGEN_5) / sd_DGEN_5
|
||||||
|
df$stdDGEN_10 <- (df$ehiDGEN_10_mean - mean_DGEN_10) / sd_DGEN_10
|
||||||
|
df$stdDS_5 <- (df$ehi5_global_mean - mean_DS_5) / sd_DS_5
|
||||||
|
df$stdDS_10 <- (df$ehi10_global_mean - mean_DS_10) / sd_DS_10
|
||||||
|
|
||||||
|
# Check that variables have been standardized
|
||||||
|
print(round(mean(df$stdDGEN_5, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$stdDGEN_5, na.rm = TRUE), 5))
|
||||||
|
print(round(mean(df$stdDGEN_10, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$stdDGEN_10, na.rm = TRUE), 5))
|
||||||
|
print(round(mean(df$stdDS_5, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$stdDS_5, na.rm = TRUE), 5))
|
||||||
|
print(round(mean(df$stdDS_10, na.rm = TRUE), 5))
|
||||||
|
print(round(sd(df$stdDS_10, na.rm = TRUE), 5))
|
||||||
|
|
||||||
|
# Calculate mean of standardized variables
|
||||||
|
df$stdEHI_mean <- rowMeans(df[, c("stdDGEN_5", "stdDGEN_10", "stdDS_5", "stdDS_10")], na.rm = TRUE)
|
||||||
|
|
||||||
|
#### check random 10 rows
|
||||||
|
|
||||||
|
# Check 10 random rows to verify calculations
|
||||||
|
set.seed(123) # For reproducible random selection
|
||||||
|
random_rows <- sample(nrow(df), 10)
|
||||||
|
|
||||||
|
cat("Checking 10 random rows:\n")
|
||||||
|
cat("Row | ehiDGEN_5_mean | stdDGEN_5 | Calculation | ehiDGEN_10_mean | stdDGEN_10 | Calculation\n")
|
||||||
|
cat("----|----------------|-----------|-------------|-----------------|------------|------------\n")
|
||||||
|
|
||||||
|
for(i in random_rows) {
|
||||||
|
orig_5 <- df$ehiDGEN_5_mean[i]
|
||||||
|
std_5 <- df$stdDGEN_5[i]
|
||||||
|
calc_5 <- (orig_5 - mean_DGEN_5) / sd_DGEN_5
|
||||||
|
|
||||||
|
orig_10 <- df$ehiDGEN_10_mean[i]
|
||||||
|
std_10 <- df$stdDGEN_10[i]
|
||||||
|
calc_10 <- (orig_10 - mean_DGEN_10) / sd_DGEN_10
|
||||||
|
|
||||||
|
cat(sprintf("%3d | %13.5f | %9.5f | %11.5f | %15.5f | %10.5f | %11.5f\n",
|
||||||
|
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
|
||||||
|
}
|
||||||
|
|
||||||
|
cat("\nRow | ehi5_global_mean | stdDS_5 | Calculation | ehi10_global_mean | stdDS_10 | Calculation\n")
|
||||||
|
cat("----|------------------|---------|-------------|-------------------|----------|------------\n")
|
||||||
|
|
||||||
|
for(i in random_rows) {
|
||||||
|
orig_5 <- df$ehi5_global_mean[i]
|
||||||
|
std_5 <- df$stdDS_5[i]
|
||||||
|
calc_5 <- (orig_5 - mean_DS_5) / sd_DS_5
|
||||||
|
|
||||||
|
orig_10 <- df$ehi10_global_mean[i]
|
||||||
|
std_10 <- df$stdDS_10[i]
|
||||||
|
calc_10 <- (orig_10 - mean_DS_10) / sd_DS_10
|
||||||
|
|
||||||
|
cat(sprintf("%3d | %16.5f | %8.5f | %11.5f | %17.5f | %9.5f | %11.5f\n",
|
||||||
|
i, orig_5, std_5, calc_5, orig_10, std_10, calc_10))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Show the final stdEHI_mean for these rows
|
||||||
|
cat("\nRow | stdEHI_mean | Manual calc\n")
|
||||||
|
cat("----|-------------|------------\n")
|
||||||
|
for(i in random_rows) {
|
||||||
|
manual_mean <- -0.042564413 -0.158849227 -1.444812436 -0.23426232 -0.470122099
|
||||||
|
mean(c(df$stdDGEN_5[i], df$stdDGEN_10[i], df$stdDS_5[i], df$stdDS_10[i]), na.rm = TRUE)
|
||||||
|
cat(sprintf("%3d | %11.5f | %11.5f\n", i, df$stdEHI_mean[i], manual_mean))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Write to CSV
|
||||||
|
write.csv(df, "eohi2.csv", row.names = FALSE)
|
||||||
24
eohi2/descriptive_statistics.csv
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
|
||||||
|
"ehiDGEN_5_Pref",1,489,0.06339,1.9954,0,0.03308,1.4826,-9,9,18,0.27576,4.00366,0.09024
|
||||||
|
"ehiDGEN_5_Pers",2,489,0.09816,1.85725,0,0.1145,1.4826,-9,7,16,-0.18711,3.23028,0.08399
|
||||||
|
"ehiDGEN_5_Val",3,489,0.02454,2.16042,0,0.06361,1.4826,-9,10,19,0.1765,4.44483,0.0977
|
||||||
|
"ehiDGEN_10_Pref",4,489,0.20245,2.29618,0,0.18575,1.4826,-10,9,19,0.01558,3.90242,0.10384
|
||||||
|
"ehiDGEN_10_Pers",5,489,0.29448,2.34328,0,0.24936,1.4826,-9,10,19,0.06997,2.70639,0.10597
|
||||||
|
"ehiDGEN_10_Val",6,489,0.40491,2.32319,0,0.32061,1.4826,-8,10,18,0.37025,2.73426,0.10506
|
||||||
|
"ehi5_pref_MEAN",7,489,0.07607,0.64562,0,0.05954,0.29652,-4.4,4,8.4,0.13291,10.10075,0.0292
|
||||||
|
"ehi5_pers_MEAN",8,489,0.07454,0.74495,0,0.05267,0.59304,-2.4,3.6,6,0.58141,3.07505,0.03369
|
||||||
|
"ehi5_val_MEAN",9,489,0.05194,0.58562,0,0.02748,0.29652,-2.6,3,5.6,0.62482,5.46128,0.02648
|
||||||
|
"ehi10_pref_MEAN",10,489,0.10348,0.69093,0,0.09059,0.59304,-3.6,3.6,7.2,0.02685,4.20962,0.03124
|
||||||
|
"ehi10_pers_MEAN",11,489,0.12168,0.75889,0,0.09389,0.59304,-3.6,3.8,7.4,0.14345,3.87988,0.03432
|
||||||
|
"ehi10_val_MEAN",12,489,0.14397,0.71372,0,0.08193,0.29652,-2.4,4,6.4,1.25911,5.60017,0.03228
|
||||||
|
"ehi5.10_pref_MEAN",13,489,0.13538,0.70635,0,0.12214,0.29652,-5.2,2.6,7.8,-0.87659,8.98815,0.03194
|
||||||
|
"ehi5.10_pers_MEAN",14,489,0.18773,0.77513,0,0.16489,0.59304,-3.6,3.4,7,0.18903,2.50653,0.03505
|
||||||
|
"ehi5.10_val_MEAN",15,489,0.12229,0.73027,0,0.08193,0.29652,-2.8,3.6,6.4,0.68131,4.79817,0.03302
|
||||||
|
"ehiDGEN_5_mean",16,489,0.06203,1.45735,0,0.06361,0.9884,-7.66667,5.66667,13.33333,-0.33245,3.67417,0.0659
|
||||||
|
"ehiDGEN_10_mean",17,489,0.30061,1.89245,0,0.28329,0.9884,-8,9,17,0.01127,3.38489,0.08558
|
||||||
|
"ehi5_global_mean",18,489,0.06752,0.46201,0,0.0486,0.29652,-1.93333,3.33333,5.26667,0.93595,7.08224,0.02089
|
||||||
|
"ehi10_global_mean",19,489,0.12304,0.52522,0,0.08494,0.29652,-2.46667,2.33333,4.8,0.55734,3.94213,0.02375
|
||||||
|
"ehi5.10_global_mean",20,489,0.14847,0.50536,0.06667,0.12231,0.29652,-1.93333,2.8,4.73333,0.67593,4.05776,0.02285
|
||||||
|
"aot_total",21,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
|
||||||
|
"crt_correct",22,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
|
||||||
|
"crt_int",23,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
|
||||||
|
15
eohi2/descriptive_statistics_domain_general_vars.csv
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
"","vars","n","mean","sd","median","trimmed","mad","min","max","range","skew","kurtosis","se"
|
||||||
|
"DGEN_past_5_mean",1,489,3.70279,2.64159,3.33333,3.57422,2.9652,0,10,10,0.33519,-0.92598,0.11946
|
||||||
|
"DGEN_past_10_mean",2,489,4.12747,2.69248,4,4.07209,3.4594,0,10,10,0.15303,-1.06997,0.12176
|
||||||
|
"DGEN_fut_5_mean",3,489,3.64076,2.72097,3,3.47498,2.9652,0,10,10,0.45879,-0.91272,0.12305
|
||||||
|
"DGEN_fut_10_mean",4,489,3.82686,2.67605,3.33333,3.6743,2.9652,0,10,10,0.41341,-0.85159,0.12101
|
||||||
|
"DGEN_past_5.10_mean",5,489,1.10907,1.14357,0.66667,0.93384,0.9884,0,6.66667,6.66667,1.38388,2.20547,0.05171
|
||||||
|
"DGEN_fut_5.10_mean",6,489,1.03272,1.16269,0.66667,0.83885,0.9884,0,7.33333,7.33333,1.82686,4.61137,0.05258
|
||||||
|
"DGENpast_global_mean",7,489,2.97978,1.86338,3.11111,2.96409,2.30627,0,6.66667,6.66667,0.05122,-1.1105,0.08427
|
||||||
|
"DGENfut_global_mean",8,489,2.83345,1.86526,2.66667,2.76788,2.30627,0,6.66667,6.66667,0.25867,-1.05797,0.08435
|
||||||
|
"DGEN_5_global_mean",9,489,3.67178,2.58067,3.16667,3.53859,2.9652,0,10,10,0.38068,-0.90478,0.1167
|
||||||
|
"DGEN_10_global_mean",10,489,3.97716,2.51197,3.83333,3.89992,2.9652,0,10,10,0.23861,-0.92036,0.1136
|
||||||
|
"DGEN_5.10_global_mean",11,489,1.07089,0.95646,0.83333,0.95335,0.9884,0,5,5,1.13335,1.28416,0.04325
|
||||||
|
"aot_total",12,489,0.20475,0.50227,0.125,0.18861,0.37065,-2,1.75,3.75,0.06455,1.92235,0.02271
|
||||||
|
"crt_correct",13,489,0.26858,0.35082,0,0.21204,0,0,1,1,0.97116,-0.45636,0.01586
|
||||||
|
"crt_int",14,489,0.67212,0.3564,0.66667,0.71416,0.4942,0,1,1,-0.65482,-0.92392,0.01612
|
||||||
|