Initial commit
This commit is contained in:
commit
f8eb3da04d
118
.history/eohi1/BS_means_20250922131352.vb
Normal file
118
.history/eohi1/BS_means_20250922131352.vb
Normal file
@ -0,0 +1,118 @@
|
||||
Option Explicit
|
||||
|
||||
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
If IsError(m) Then
|
||||
GetColIndex = 0
|
||||
Else
|
||||
GetColIndex = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
|
||||
Dim tmp() As Long
|
||||
ReDim tmp(0 To UBound(headers))
|
||||
Dim i As Long, c As Long
|
||||
c = 0
|
||||
For i = LBound(headers) To UBound(headers)
|
||||
Dim colIdx As Long
|
||||
colIdx = GetColIndex(CStr(headers(i)), ws)
|
||||
If colIdx > 0 Then
|
||||
tmp(c) = colIdx
|
||||
c = c + 1
|
||||
End If
|
||||
Next i
|
||||
If c = 0 Then
|
||||
BuildPresentColArray = Array()
|
||||
Else
|
||||
Dim outArr() As Long
|
||||
ReDim outArr(0 To c - 1)
|
||||
For i = 0 To c - 1
|
||||
outArr(i) = tmp(i)
|
||||
Next i
|
||||
BuildPresentColArray = outArr
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
|
||||
Dim i As Long
|
||||
Dim sumVals As Double
|
||||
Dim countVals As Long
|
||||
sumVals = 0
|
||||
countVals = 0
|
||||
If IsArray(colIndexes) Then
|
||||
For i = LBound(colIndexes) To UBound(colIndexes)
|
||||
Dim v As Variant
|
||||
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
|
||||
If Not IsError(v) Then
|
||||
If IsNumeric(v) Then
|
||||
sumVals = sumVals + CDbl(v)
|
||||
countVals = countVals + 1
|
||||
End If
|
||||
End If
|
||||
Next i
|
||||
End If
|
||||
If countVals = 0 Then
|
||||
MeanOfRow = CVErr(xlErrNA)
|
||||
Else
|
||||
MeanOfRow = sumVals / countVals
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
|
||||
Dim c As Long
|
||||
c = GetColIndex(headerName, ws)
|
||||
If c = 0 Then
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
c = lastCol + 1
|
||||
ws.Cells(1, c).Value = headerName
|
||||
End If
|
||||
EnsureOutputColumn = c
|
||||
End Function
|
||||
|
||||
Sub BS_Means()
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
|
||||
Dim all28 As Variant
|
||||
all28 = Array( _
|
||||
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
|
||||
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||
)
|
||||
|
||||
Dim easy14 As Variant
|
||||
easy14 = Array( _
|
||||
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
|
||||
)
|
||||
|
||||
Dim hard14 As Variant
|
||||
hard14 = Array( _
|
||||
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||
)
|
||||
|
||||
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
|
||||
colsAll = BuildPresentColArray(all28, ws)
|
||||
colsEasy = BuildPresentColArray(easy14, ws)
|
||||
colsHard = BuildPresentColArray(hard14, ws)
|
||||
|
||||
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
|
||||
colBS28 = EnsureOutputColumn(ws, "bs_28")
|
||||
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
|
||||
colBSHard = EnsureOutputColumn(ws, "bs_hard")
|
||||
|
||||
Dim lastRow As Long
|
||||
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
|
||||
Dim r As Long
|
||||
For r = 2 To lastRow
|
||||
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
|
||||
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
|
||||
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
|
||||
Next r
|
||||
End Sub
|
||||
|
||||
|
||||
118
.history/eohi1/BS_means_20250922131356.vb
Normal file
118
.history/eohi1/BS_means_20250922131356.vb
Normal file
@ -0,0 +1,118 @@
|
||||
Option Explicit
|
||||
|
||||
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
If IsError(m) Then
|
||||
GetColIndex = 0
|
||||
Else
|
||||
GetColIndex = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
|
||||
Dim tmp() As Long
|
||||
ReDim tmp(0 To UBound(headers))
|
||||
Dim i As Long, c As Long
|
||||
c = 0
|
||||
For i = LBound(headers) To UBound(headers)
|
||||
Dim colIdx As Long
|
||||
colIdx = GetColIndex(CStr(headers(i)), ws)
|
||||
If colIdx > 0 Then
|
||||
tmp(c) = colIdx
|
||||
c = c + 1
|
||||
End If
|
||||
Next i
|
||||
If c = 0 Then
|
||||
BuildPresentColArray = Array()
|
||||
Else
|
||||
Dim outArr() As Long
|
||||
ReDim outArr(0 To c - 1)
|
||||
For i = 0 To c - 1
|
||||
outArr(i) = tmp(i)
|
||||
Next i
|
||||
BuildPresentColArray = outArr
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
|
||||
Dim i As Long
|
||||
Dim sumVals As Double
|
||||
Dim countVals As Long
|
||||
sumVals = 0
|
||||
countVals = 0
|
||||
If IsArray(colIndexes) Then
|
||||
For i = LBound(colIndexes) To UBound(colIndexes)
|
||||
Dim v As Variant
|
||||
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
|
||||
If Not IsError(v) Then
|
||||
If IsNumeric(v) Then
|
||||
sumVals = sumVals + CDbl(v)
|
||||
countVals = countVals + 1
|
||||
End If
|
||||
End If
|
||||
Next i
|
||||
End If
|
||||
If countVals = 0 Then
|
||||
MeanOfRow = CVErr(xlErrNA)
|
||||
Else
|
||||
MeanOfRow = sumVals / countVals
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
|
||||
Dim c As Long
|
||||
c = GetColIndex(headerName, ws)
|
||||
If c = 0 Then
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
c = lastCol + 1
|
||||
ws.Cells(1, c).Value = headerName
|
||||
End If
|
||||
EnsureOutputColumn = c
|
||||
End Function
|
||||
|
||||
Sub BS_Means()
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
|
||||
Dim all28 As Variant
|
||||
all28 = Array( _
|
||||
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
|
||||
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||
)
|
||||
|
||||
Dim easy14 As Variant
|
||||
easy14 = Array( _
|
||||
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
|
||||
)
|
||||
|
||||
Dim hard14 As Variant
|
||||
hard14 = Array( _
|
||||
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||
)
|
||||
|
||||
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
|
||||
colsAll = BuildPresentColArray(all28, ws)
|
||||
colsEasy = BuildPresentColArray(easy14, ws)
|
||||
colsHard = BuildPresentColArray(hard14, ws)
|
||||
|
||||
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
|
||||
colBS28 = EnsureOutputColumn(ws, "bs_28")
|
||||
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
|
||||
colBSHard = EnsureOutputColumn(ws, "bs_hard")
|
||||
|
||||
Dim lastRow As Long
|
||||
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
|
||||
Dim r As Long
|
||||
For r = 2 To lastRow
|
||||
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
|
||||
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
|
||||
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
|
||||
Next r
|
||||
End Sub
|
||||
|
||||
|
||||
118
.history/eohi1/BS_means_20250922131406.vb
Normal file
118
.history/eohi1/BS_means_20250922131406.vb
Normal file
@ -0,0 +1,118 @@
|
||||
Option Explicit
|
||||
|
||||
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
If IsError(m) Then
|
||||
GetColIndex = 0
|
||||
Else
|
||||
GetColIndex = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
|
||||
Dim tmp() As Long
|
||||
ReDim tmp(0 To UBound(headers))
|
||||
Dim i As Long, c As Long
|
||||
c = 0
|
||||
For i = LBound(headers) To UBound(headers)
|
||||
Dim colIdx As Long
|
||||
colIdx = GetColIndex(CStr(headers(i)), ws)
|
||||
If colIdx > 0 Then
|
||||
tmp(c) = colIdx
|
||||
c = c + 1
|
||||
End If
|
||||
Next i
|
||||
If c = 0 Then
|
||||
BuildPresentColArray = Array()
|
||||
Else
|
||||
Dim outArr() As Long
|
||||
ReDim outArr(0 To c - 1)
|
||||
For i = 0 To c - 1
|
||||
outArr(i) = tmp(i)
|
||||
Next i
|
||||
BuildPresentColArray = outArr
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
|
||||
Dim i As Long
|
||||
Dim sumVals As Double
|
||||
Dim countVals As Long
|
||||
sumVals = 0
|
||||
countVals = 0
|
||||
If IsArray(colIndexes) Then
|
||||
For i = LBound(colIndexes) To UBound(colIndexes)
|
||||
Dim v As Variant
|
||||
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
|
||||
If Not IsError(v) Then
|
||||
If IsNumeric(v) Then
|
||||
sumVals = sumVals + CDbl(v)
|
||||
countVals = countVals + 1
|
||||
End If
|
||||
End If
|
||||
Next i
|
||||
End If
|
||||
If countVals = 0 Then
|
||||
MeanOfRow = CVErr(xlErrNA)
|
||||
Else
|
||||
MeanOfRow = sumVals / countVals
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
|
||||
Dim c As Long
|
||||
c = GetColIndex(headerName, ws)
|
||||
If c = 0 Then
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
c = lastCol + 1
|
||||
ws.Cells(1, c).Value = headerName
|
||||
End If
|
||||
EnsureOutputColumn = c
|
||||
End Function
|
||||
|
||||
Sub BS_Means()
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
|
||||
Dim all28 As Variant
|
||||
all28 = Array( _
|
||||
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
|
||||
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||
)
|
||||
|
||||
Dim easy14 As Variant
|
||||
easy14 = Array( _
|
||||
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
|
||||
)
|
||||
|
||||
Dim hard14 As Variant
|
||||
hard14 = Array( _
|
||||
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
|
||||
)
|
||||
|
||||
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
|
||||
colsAll = BuildPresentColArray(all28, ws)
|
||||
colsEasy = BuildPresentColArray(easy14, ws)
|
||||
colsHard = BuildPresentColArray(hard14, ws)
|
||||
|
||||
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
|
||||
colBS28 = EnsureOutputColumn(ws, "bs_28")
|
||||
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
|
||||
colBSHard = EnsureOutputColumn(ws, "bs_hard")
|
||||
|
||||
Dim lastRow As Long
|
||||
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
|
||||
Dim r As Long
|
||||
For r = 2 To lastRow
|
||||
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
|
||||
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
|
||||
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
|
||||
Next r
|
||||
End Sub
|
||||
|
||||
|
||||
@ -0,0 +1,25 @@
|
||||
options(scipen = 999)
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
# Load the data
|
||||
exp1_data <- read.csv("exp1.csv")
|
||||
|
||||
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
|
||||
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
|
||||
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Save the updated data
|
||||
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
|
||||
|
||||
# Display summary of the calculated totals
|
||||
cat("NPast_mean_total summary:\n")
|
||||
summary(exp1_data$NPast_mean_total)
|
||||
cat("\nNFut_mean_total summary:\n")
|
||||
summary(exp1_data$NFut_mean_total)
|
||||
|
||||
# Show first few rows to verify calculations
|
||||
cat("\nFirst 5 rows of calculated totals:\n")
|
||||
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
|
||||
@ -0,0 +1,25 @@
|
||||
options(scipen = 999)
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
# Load the data
|
||||
exp1_data <- read.csv("exp1.csv")
|
||||
|
||||
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
|
||||
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
|
||||
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Save the updated data
|
||||
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
|
||||
|
||||
# Display summary of the calculated totals
|
||||
cat("NPast_mean_total summary:\n")
|
||||
summary(exp1_data$NPast_mean_total)
|
||||
cat("\nNFut_mean_total summary:\n")
|
||||
summary(exp1_data$NFut_mean_total)
|
||||
|
||||
# Show first few rows to verify calculations
|
||||
cat("\nFirst 5 rows of calculated totals:\n")
|
||||
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
|
||||
@ -0,0 +1,25 @@
|
||||
options(scipen = 999)
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
# Load the data
|
||||
exp1_data <- read.csv("exp1.csv")
|
||||
|
||||
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
|
||||
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
|
||||
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Save the updated data
|
||||
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
|
||||
|
||||
# Display summary of the calculated totals
|
||||
cat("NPast_mean_total summary:\n")
|
||||
summary(exp1_data$NPast_mean_total)
|
||||
cat("\nNFut_mean_total summary:\n")
|
||||
summary(exp1_data$NFut_mean_total)
|
||||
|
||||
# Show first few rows to verify calculations
|
||||
cat("\nFirst 5 rows of calculated totals:\n")
|
||||
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
|
||||
@ -0,0 +1,25 @@
|
||||
options(scipen = 999)
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
# Load the data
|
||||
exp1_data <- read.csv("exp1.csv")
|
||||
|
||||
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
|
||||
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
|
||||
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
|
||||
|
||||
# Save the updated data
|
||||
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
|
||||
|
||||
# Display summary of the calculated totals
|
||||
cat("NPast_mean_total summary:\n")
|
||||
summary(exp1_data$NPast_mean_total)
|
||||
cat("\nNFut_mean_total summary:\n")
|
||||
summary(exp1_data$NFut_mean_total)
|
||||
|
||||
# Show first few rows to verify calculations
|
||||
cat("\nFirst 5 rows of calculated totals:\n")
|
||||
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])
|
||||
@ -0,0 +1,82 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
@ -0,0 +1,121 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
@ -0,0 +1,175 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
@ -0,0 +1,205 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
@ -0,0 +1,310 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
@ -0,0 +1,348 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
@ -0,0 +1,430 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
@ -0,0 +1,497 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
# Effect Sizes (Cohen's d)
|
||||
|
||||
## Main Effects
|
||||
|
||||
```{r cohens-d-main}
|
||||
# Create aov model for emmeans
|
||||
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||
data = long_data_clean)
|
||||
|
||||
# Main Effect of TIME
|
||||
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||
print(time_emmeans)
|
||||
|
||||
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||
time_main_df <- as.data.frame(time_main_contrast)
|
||||
print(time_main_df)
|
||||
|
||||
# Calculate Cohen's d for TIME main effect
|
||||
if(nrow(time_main_df) > 0) {
|
||||
cat("\nCohen's d for TIME main effect:\n")
|
||||
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||
|
||||
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r cohens-d-domain}
|
||||
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||
print(domain_emmeans)
|
||||
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||
print(domain_main_df)
|
||||
|
||||
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||
if(nrow(significant_domain) > 0) {
|
||||
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||
for(i in seq_len(nrow(significant_domain))) {
|
||||
contrast_name <- as.character(significant_domain$contrast[i])
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
domain_cohens_d <- cohen.d(data1, data2)
|
||||
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@ -0,0 +1,573 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
# Effect Sizes (Cohen's d)
|
||||
|
||||
## Main Effects
|
||||
|
||||
```{r cohens-d-main}
|
||||
# Create aov model for emmeans
|
||||
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||
data = long_data_clean)
|
||||
|
||||
# Main Effect of TIME
|
||||
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||
print(time_emmeans)
|
||||
|
||||
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||
time_main_df <- as.data.frame(time_main_contrast)
|
||||
print(time_main_df)
|
||||
|
||||
# Calculate Cohen's d for TIME main effect
|
||||
if(nrow(time_main_df) > 0) {
|
||||
cat("\nCohen's d for TIME main effect:\n")
|
||||
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||
|
||||
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r cohens-d-domain}
|
||||
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||
print(domain_emmeans)
|
||||
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||
print(domain_main_df)
|
||||
|
||||
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||
if(nrow(significant_domain) > 0) {
|
||||
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||
for(i in seq_len(nrow(significant_domain))) {
|
||||
contrast_name <- as.character(significant_domain$contrast[i])
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
domain_cohens_d <- cohen.d(data1, data2)
|
||||
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Two-Way Interactions
|
||||
|
||||
```{r cohens-d-function}
|
||||
# Function to calculate Cohen's d for pairwise comparisons
|
||||
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||
|
||||
if(nrow(significant_pairs) > 0) {
|
||||
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||
print(significant_pairs)
|
||||
|
||||
cat("\nCohen's d calculated from raw data:\n")
|
||||
|
||||
for(i in seq_len(nrow(significant_pairs))) {
|
||||
comparison <- significant_pairs[i, ]
|
||||
contrast_name <- as.character(comparison$contrast)
|
||||
|
||||
# Parse the contrast
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
# Get raw data for both conditions
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
group2_level <- as.character(comparison[[group2_var]])
|
||||
data1 <- data[[response_var]][
|
||||
data[[group1_var]] == level1 &
|
||||
data[[group2_var]] == group2_level]
|
||||
|
||||
data2 <- data[[response_var]][
|
||||
data[[group1_var]] == level2 &
|
||||
data[[group2_var]] == group2_level]
|
||||
} else {
|
||||
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||
}
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
# Calculate Cohen's d using effsize package
|
||||
cohens_d_result <- cohen.d(data1, data2)
|
||||
|
||||
cat(sprintf("Comparison: %s", contrast_name))
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
cat(sprintf(" | %s", group2_level))
|
||||
}
|
||||
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cat("No significant pairwise comparisons found.\n")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r interaction-effects}
|
||||
# Note: These sections would need the actual simple effects results from your analysis
|
||||
# The original script references undefined variables: temporal_time_simple and time_domain_simple
|
||||
# These would need to be calculated using emmeans for simple effects
|
||||
|
||||
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
|
||||
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
|
||||
|
||||
# 2. TIME × DOMAIN INTERACTION
|
||||
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
|
||||
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
|
||||
```
|
||||
|
||||
@ -0,0 +1,660 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
# Effect Sizes (Cohen's d)
|
||||
|
||||
## Main Effects
|
||||
|
||||
```{r cohens-d-main}
|
||||
# Create aov model for emmeans
|
||||
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||
data = long_data_clean)
|
||||
|
||||
# Main Effect of TIME
|
||||
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||
print(time_emmeans)
|
||||
|
||||
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||
time_main_df <- as.data.frame(time_main_contrast)
|
||||
print(time_main_df)
|
||||
|
||||
# Calculate Cohen's d for TIME main effect
|
||||
if(nrow(time_main_df) > 0) {
|
||||
cat("\nCohen's d for TIME main effect:\n")
|
||||
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||
|
||||
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r cohens-d-domain}
|
||||
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||
print(domain_emmeans)
|
||||
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||
print(domain_main_df)
|
||||
|
||||
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||
if(nrow(significant_domain) > 0) {
|
||||
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||
for(i in seq_len(nrow(significant_domain))) {
|
||||
contrast_name <- as.character(significant_domain$contrast[i])
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
domain_cohens_d <- cohen.d(data1, data2)
|
||||
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Two-Way Interactions
|
||||
|
||||
```{r cohens-d-function}
|
||||
# Function to calculate Cohen's d for pairwise comparisons
|
||||
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||
|
||||
if(nrow(significant_pairs) > 0) {
|
||||
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||
print(significant_pairs)
|
||||
|
||||
cat("\nCohen's d calculated from raw data:\n")
|
||||
|
||||
for(i in seq_len(nrow(significant_pairs))) {
|
||||
comparison <- significant_pairs[i, ]
|
||||
contrast_name <- as.character(comparison$contrast)
|
||||
|
||||
# Parse the contrast
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
# Get raw data for both conditions
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
group2_level <- as.character(comparison[[group2_var]])
|
||||
data1 <- data[[response_var]][
|
||||
data[[group1_var]] == level1 &
|
||||
data[[group2_var]] == group2_level]
|
||||
|
||||
data2 <- data[[response_var]][
|
||||
data[[group1_var]] == level2 &
|
||||
data[[group2_var]] == group2_level]
|
||||
} else {
|
||||
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||
}
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
# Calculate Cohen's d using effsize package
|
||||
cohens_d_result <- cohen.d(data1, data2)
|
||||
|
||||
cat(sprintf("Comparison: %s", contrast_name))
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
cat(sprintf(" | %s", group2_level))
|
||||
}
|
||||
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cat("No significant pairwise comparisons found.\n")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r interaction-effects}
|
||||
# Note: These sections would need the actual simple effects results from your analysis
|
||||
# The original script references undefined variables: temporal_time_simple and time_domain_simple
|
||||
# These would need to be calculated using emmeans for simple effects
|
||||
|
||||
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
|
||||
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
|
||||
|
||||
# 2. TIME × DOMAIN INTERACTION
|
||||
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
|
||||
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
|
||||
```
|
||||
|
||||
# Interaction Plot
|
||||
|
||||
```{r interaction-plot}
|
||||
# Define color palette for DOMAIN (4 levels)
|
||||
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
|
||||
|
||||
# Define TIME levels (Past, Future order)
|
||||
time_levels <- c("Past", "Future")
|
||||
|
||||
# Create estimated marginal means for DOMAIN x TIME
|
||||
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
|
||||
|
||||
# Prepare emmeans data frame
|
||||
emmeans_data2 <- emm_full %>%
|
||||
as.data.frame() %>%
|
||||
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||
rename(
|
||||
ci_lower = lower.CL,
|
||||
ci_upper = upper.CL,
|
||||
plot_mean = emmean
|
||||
) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
iPlot <- long_data_clean %>%
|
||||
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
# Plot without TEMPORAL_DO facet
|
||||
interaction_plot2 <- ggplot() +
|
||||
geom_point(
|
||||
data = iPlot,
|
||||
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
|
||||
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
|
||||
alpha = 0.3, shape = 16
|
||||
) +
|
||||
geom_rect(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
ymin = ci_lower, ymax = ci_upper,
|
||||
fill = DOMAIN
|
||||
),
|
||||
color = "black", alpha = 0.5
|
||||
) +
|
||||
geom_segment(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = ci_lower, yend = ci_upper
|
||||
),
|
||||
color = "black"
|
||||
) +
|
||||
geom_point(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = plot_mean,
|
||||
color = DOMAIN,
|
||||
shape = DOMAIN
|
||||
),
|
||||
size = 2.5, stroke = 0.8, fill = "black"
|
||||
) +
|
||||
labs(
|
||||
x = "TIME", y = "Mean Difference",
|
||||
title = "DOMAIN × TIME Interaction", subtitle = ""
|
||||
) +
|
||||
scale_color_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_fill_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 0, hjust = 0.5),
|
||||
plot.title = element_text(size = 14, hjust = 0.5),
|
||||
plot.subtitle = element_text(size = 12, hjust = 0.5)
|
||||
)
|
||||
|
||||
print(interaction_plot2)
|
||||
```
|
||||
|
||||
@ -0,0 +1,660 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
# Effect Sizes (Cohen's d)
|
||||
|
||||
## Main Effects
|
||||
|
||||
```{r cohens-d-main}
|
||||
# Create aov model for emmeans
|
||||
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||
data = long_data_clean)
|
||||
|
||||
# Main Effect of TIME
|
||||
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||
print(time_emmeans)
|
||||
|
||||
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||
time_main_df <- as.data.frame(time_main_contrast)
|
||||
print(time_main_df)
|
||||
|
||||
# Calculate Cohen's d for TIME main effect
|
||||
if(nrow(time_main_df) > 0) {
|
||||
cat("\nCohen's d for TIME main effect:\n")
|
||||
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||
|
||||
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r cohens-d-domain}
|
||||
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||
print(domain_emmeans)
|
||||
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||
print(domain_main_df)
|
||||
|
||||
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||
if(nrow(significant_domain) > 0) {
|
||||
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||
for(i in seq_len(nrow(significant_domain))) {
|
||||
contrast_name <- as.character(significant_domain$contrast[i])
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
domain_cohens_d <- cohen.d(data1, data2)
|
||||
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Two-Way Interactions
|
||||
|
||||
```{r cohens-d-function}
|
||||
# Function to calculate Cohen's d for pairwise comparisons
|
||||
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||
|
||||
if(nrow(significant_pairs) > 0) {
|
||||
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||
print(significant_pairs)
|
||||
|
||||
cat("\nCohen's d calculated from raw data:\n")
|
||||
|
||||
for(i in seq_len(nrow(significant_pairs))) {
|
||||
comparison <- significant_pairs[i, ]
|
||||
contrast_name <- as.character(comparison$contrast)
|
||||
|
||||
# Parse the contrast
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
# Get raw data for both conditions
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
group2_level <- as.character(comparison[[group2_var]])
|
||||
data1 <- data[[response_var]][
|
||||
data[[group1_var]] == level1 &
|
||||
data[[group2_var]] == group2_level]
|
||||
|
||||
data2 <- data[[response_var]][
|
||||
data[[group1_var]] == level2 &
|
||||
data[[group2_var]] == group2_level]
|
||||
} else {
|
||||
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||
}
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
# Calculate Cohen's d using effsize package
|
||||
cohens_d_result <- cohen.d(data1, data2)
|
||||
|
||||
cat(sprintf("Comparison: %s", contrast_name))
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
cat(sprintf(" | %s", group2_level))
|
||||
}
|
||||
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cat("No significant pairwise comparisons found.\n")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r interaction-effects}
|
||||
# Note: These sections would need the actual simple effects results from your analysis
|
||||
# The original script references undefined variables: temporal_time_simple and time_domain_simple
|
||||
# These would need to be calculated using emmeans for simple effects
|
||||
|
||||
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
|
||||
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
|
||||
|
||||
# 2. TIME × DOMAIN INTERACTION
|
||||
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
|
||||
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
|
||||
```
|
||||
|
||||
# Interaction Plot
|
||||
|
||||
```{r interaction-plot}
|
||||
# Define color palette for DOMAIN (4 levels)
|
||||
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
|
||||
|
||||
# Define TIME levels (Past, Future order)
|
||||
time_levels <- c("Past", "Future")
|
||||
|
||||
# Create estimated marginal means for DOMAIN x TIME
|
||||
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
|
||||
|
||||
# Prepare emmeans data frame
|
||||
emmeans_data2 <- emm_full %>%
|
||||
as.data.frame() %>%
|
||||
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||
rename(
|
||||
ci_lower = lower.CL,
|
||||
ci_upper = upper.CL,
|
||||
plot_mean = emmean
|
||||
) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
iPlot <- long_data_clean %>%
|
||||
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
# Plot without TEMPORAL_DO facet
|
||||
interaction_plot2 <- ggplot() +
|
||||
geom_point(
|
||||
data = iPlot,
|
||||
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
|
||||
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
|
||||
alpha = 0.3, shape = 16
|
||||
) +
|
||||
geom_rect(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
ymin = ci_lower, ymax = ci_upper,
|
||||
fill = DOMAIN
|
||||
),
|
||||
color = "black", alpha = 0.5
|
||||
) +
|
||||
geom_segment(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = ci_lower, yend = ci_upper
|
||||
),
|
||||
color = "black"
|
||||
) +
|
||||
geom_point(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = plot_mean,
|
||||
color = DOMAIN,
|
||||
shape = DOMAIN
|
||||
),
|
||||
size = 2.5, stroke = 0.8, fill = "black"
|
||||
) +
|
||||
labs(
|
||||
x = "TIME", y = "Mean Difference",
|
||||
title = "DOMAIN × TIME Interaction", subtitle = ""
|
||||
) +
|
||||
scale_color_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_fill_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 0, hjust = 0.5),
|
||||
plot.title = element_text(size = 14, hjust = 0.5),
|
||||
plot.subtitle = element_text(size = 12, hjust = 0.5)
|
||||
)
|
||||
|
||||
print(interaction_plot2)
|
||||
```
|
||||
|
||||
@ -0,0 +1,660 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
# Effect Sizes (Cohen's d)
|
||||
|
||||
## Main Effects
|
||||
|
||||
```{r cohens-d-main}
|
||||
# Create aov model for emmeans
|
||||
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||
data = long_data_clean)
|
||||
|
||||
# Main Effect of TIME
|
||||
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||
print(time_emmeans)
|
||||
|
||||
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||
time_main_df <- as.data.frame(time_main_contrast)
|
||||
print(time_main_df)
|
||||
|
||||
# Calculate Cohen's d for TIME main effect
|
||||
if(nrow(time_main_df) > 0) {
|
||||
cat("\nCohen's d for TIME main effect:\n")
|
||||
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||
|
||||
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r cohens-d-domain}
|
||||
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||
print(domain_emmeans)
|
||||
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||
print(domain_main_df)
|
||||
|
||||
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||
if(nrow(significant_domain) > 0) {
|
||||
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||
for(i in seq_len(nrow(significant_domain))) {
|
||||
contrast_name <- as.character(significant_domain$contrast[i])
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
domain_cohens_d <- cohen.d(data1, data2)
|
||||
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Two-Way Interactions
|
||||
|
||||
```{r cohens-d-function}
|
||||
# Function to calculate Cohen's d for pairwise comparisons
|
||||
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||
|
||||
if(nrow(significant_pairs) > 0) {
|
||||
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||
print(significant_pairs)
|
||||
|
||||
cat("\nCohen's d calculated from raw data:\n")
|
||||
|
||||
for(i in seq_len(nrow(significant_pairs))) {
|
||||
comparison <- significant_pairs[i, ]
|
||||
contrast_name <- as.character(comparison$contrast)
|
||||
|
||||
# Parse the contrast
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
# Get raw data for both conditions
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
group2_level <- as.character(comparison[[group2_var]])
|
||||
data1 <- data[[response_var]][
|
||||
data[[group1_var]] == level1 &
|
||||
data[[group2_var]] == group2_level]
|
||||
|
||||
data2 <- data[[response_var]][
|
||||
data[[group1_var]] == level2 &
|
||||
data[[group2_var]] == group2_level]
|
||||
} else {
|
||||
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||
}
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
# Calculate Cohen's d using effsize package
|
||||
cohens_d_result <- cohen.d(data1, data2)
|
||||
|
||||
cat(sprintf("Comparison: %s", contrast_name))
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
cat(sprintf(" | %s", group2_level))
|
||||
}
|
||||
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cat("No significant pairwise comparisons found.\n")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r interaction-effects}
|
||||
# Note: These sections would need the actual simple effects results from your analysis
|
||||
# The original script references undefined variables: temporal_time_simple and time_domain_simple
|
||||
# These would need to be calculated using emmeans for simple effects
|
||||
|
||||
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
|
||||
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
|
||||
|
||||
# 2. TIME × DOMAIN INTERACTION
|
||||
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
|
||||
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
|
||||
```
|
||||
|
||||
# Interaction Plot
|
||||
|
||||
```{r interaction-plot}
|
||||
# Define color palette for DOMAIN (4 levels)
|
||||
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
|
||||
|
||||
# Define TIME levels (Past, Future order)
|
||||
time_levels <- c("Past", "Future")
|
||||
|
||||
# Create estimated marginal means for DOMAIN x TIME
|
||||
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
|
||||
|
||||
# Prepare emmeans data frame
|
||||
emmeans_data2 <- emm_full %>%
|
||||
as.data.frame() %>%
|
||||
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||
rename(
|
||||
ci_lower = lower.CL,
|
||||
ci_upper = upper.CL,
|
||||
plot_mean = emmean
|
||||
) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
iPlot <- long_data_clean %>%
|
||||
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
# Plot without TEMPORAL_DO facet
|
||||
interaction_plot2 <- ggplot() +
|
||||
geom_point(
|
||||
data = iPlot,
|
||||
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
|
||||
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
|
||||
alpha = 0.3, shape = 16
|
||||
) +
|
||||
geom_rect(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
ymin = ci_lower, ymax = ci_upper,
|
||||
fill = DOMAIN
|
||||
),
|
||||
color = "black", alpha = 0.5
|
||||
) +
|
||||
geom_segment(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = ci_lower, yend = ci_upper
|
||||
),
|
||||
color = "black"
|
||||
) +
|
||||
geom_point(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = plot_mean,
|
||||
color = DOMAIN,
|
||||
shape = DOMAIN
|
||||
),
|
||||
size = 2.5, stroke = 0.8, fill = "black"
|
||||
) +
|
||||
labs(
|
||||
x = "TIME", y = "Mean Difference",
|
||||
title = "DOMAIN × TIME Interaction", subtitle = ""
|
||||
) +
|
||||
scale_color_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_fill_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 0, hjust = 0.5),
|
||||
plot.title = element_text(size = 14, hjust = 0.5),
|
||||
plot.subtitle = element_text(size = 12, hjust = 0.5)
|
||||
)
|
||||
|
||||
print(interaction_plot2)
|
||||
```
|
||||
|
||||
@ -0,0 +1,660 @@
|
||||
---
|
||||
title: "Mixed ANOVA Analysis for Domain Means"
|
||||
author: "Irina"
|
||||
date: "`r Sys.Date()`"
|
||||
output:
|
||||
html_document:
|
||||
toc: true
|
||||
toc_float: true
|
||||
code_folding: hide
|
||||
theme: flatly
|
||||
highlight: tango
|
||||
fig_width: 10
|
||||
fig_height: 6
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
|
||||
```
|
||||
|
||||
# Introduction
|
||||
|
||||
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
|
||||
|
||||
# Data Preparation and Setup
|
||||
|
||||
```{r libraries}
|
||||
library(tidyverse)
|
||||
library(ez)
|
||||
library(car)
|
||||
library(nortest) # For normality tests
|
||||
library(emmeans) # For post-hoc comparisons
|
||||
library(purrr) # For map functions
|
||||
library(effsize) # For Cohen's d calculations
|
||||
library(ggplot2) # For plotting
|
||||
|
||||
options(scipen = 999)
|
||||
options(contrasts = c("contr.sum", "contr.poly"))
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
```
|
||||
|
||||
```{r data-loading}
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
|
||||
|
||||
# Define domain mapping
|
||||
domain_mapping <- data.frame(
|
||||
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
|
||||
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
|
||||
time = c(rep("Past", 4), rep("Future", 4)),
|
||||
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
|
||||
stringsAsFactors = FALSE
|
||||
)
|
||||
```
|
||||
|
||||
```{r data-reshaping}
|
||||
long_data <- data %>%
|
||||
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
|
||||
pivot_longer(
|
||||
cols = all_of(required_vars),
|
||||
names_to = "variable",
|
||||
values_to = "MEAN_DIFFERENCE"
|
||||
) %>%
|
||||
left_join(domain_mapping, by = "variable") %>%
|
||||
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
|
||||
mutate(
|
||||
TIME = factor(time, levels = c("Past", "Future")),
|
||||
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
pID = as.factor(pID),
|
||||
TEMPORAL_DO = as.factor(TEMPORAL_DO)
|
||||
) %>%
|
||||
# Select final columns and remove any rows with missing values
|
||||
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
|
||||
filter(!is.na(MEAN_DIFFERENCE))
|
||||
|
||||
# Create clean dataset for analysis (fixing the reference issue)
|
||||
long_data_clean <- long_data
|
||||
```
|
||||
|
||||
# Descriptive Statistics
|
||||
|
||||
## Overall Descriptive Statistics by TIME and DOMAIN
|
||||
|
||||
```{r descriptive-stats}
|
||||
desc_stats <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
|
||||
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
|
||||
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats)
|
||||
```
|
||||
|
||||
## Descriptive Statistics by Between-Subjects Factors
|
||||
|
||||
```{r descriptive-stats-temporal}
|
||||
desc_stats_by_temporal <- long_data %>%
|
||||
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(desc_stats_by_temporal)
|
||||
```
|
||||
|
||||
# Assumption Testing
|
||||
|
||||
## Missing Values Check
|
||||
|
||||
```{r missing-values}
|
||||
missing_summary <- long_data %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n_total = n(),
|
||||
n_missing = sum(is.na(MEAN_DIFFERENCE)),
|
||||
pct_missing = round(100 * n_missing / n_total, 2),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(missing_summary)
|
||||
```
|
||||
|
||||
## Outlier Detection
|
||||
|
||||
```{r outlier-detection}
|
||||
outlier_summary <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
mean = mean(MEAN_DIFFERENCE),
|
||||
sd = sd(MEAN_DIFFERENCE),
|
||||
q1 = quantile(MEAN_DIFFERENCE, 0.25),
|
||||
median = median(MEAN_DIFFERENCE),
|
||||
q3 = quantile(MEAN_DIFFERENCE, 0.75),
|
||||
iqr = q3 - q1,
|
||||
lower_bound = q1 - 1.5 * iqr,
|
||||
upper_bound = q3 + 1.5 * iqr,
|
||||
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(outlier_summary)
|
||||
```
|
||||
|
||||
## Anderson-Darling Normality Test
|
||||
|
||||
```{r normality-test}
|
||||
normality_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
n = n(),
|
||||
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
|
||||
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(normality_results)
|
||||
```
|
||||
|
||||
## Homogeneity of Variance (Levene's Test)
|
||||
|
||||
### Test homogeneity across TIME within each DOMAIN
|
||||
|
||||
```{r homogeneity-time}
|
||||
homogeneity_time <- long_data_clean %>%
|
||||
group_by(DOMAIN) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_time)
|
||||
```
|
||||
|
||||
### Test homogeneity across DOMAIN within each TIME
|
||||
|
||||
```{r homogeneity-domain}
|
||||
homogeneity_domain <- long_data_clean %>%
|
||||
group_by(TIME) %>%
|
||||
summarise(
|
||||
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
|
||||
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
|
||||
.groups = 'drop'
|
||||
)
|
||||
|
||||
print(homogeneity_domain)
|
||||
```
|
||||
|
||||
## Hartley's F-Max Test with Bootstrap Critical Values
|
||||
|
||||
```{r hartley-function}
|
||||
# Function to calculate Hartley's F-max ratio
|
||||
calculate_hartley_ratio <- function(variances) {
|
||||
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
|
||||
}
|
||||
|
||||
# More efficient bootstrap function for Hartley's F-max test
|
||||
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
|
||||
# Get unique groups and their sample sizes
|
||||
groups <- unique(data[[group_var]])
|
||||
|
||||
# Calculate observed variances for each group
|
||||
observed_vars <- data %>%
|
||||
dplyr::group_by(!!rlang::sym(group_var)) %>%
|
||||
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
|
||||
dplyr::pull(var)
|
||||
|
||||
# Handle invalid variances
|
||||
if(any(observed_vars <= 0 | is.na(observed_vars))) {
|
||||
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
|
||||
}
|
||||
|
||||
# Calculate observed F-max ratio
|
||||
observed_ratio <- max(observed_vars) / min(observed_vars)
|
||||
|
||||
# Pre-allocate storage for bootstrap ratios
|
||||
bootstrap_ratios <- numeric(n_iter)
|
||||
|
||||
# Get group data once
|
||||
group_data_list <- map(groups, ~ {
|
||||
group_data <- data[data[[group_var]] == .x, response_var]
|
||||
group_data[!is.na(group_data)]
|
||||
})
|
||||
|
||||
# Bootstrap with pre-allocated storage
|
||||
for(i in 1:n_iter) {
|
||||
# Bootstrap sample from each group independently
|
||||
sample_vars <- map_dbl(group_data_list, ~ {
|
||||
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
|
||||
var(bootstrap_sample, na.rm = TRUE)
|
||||
})
|
||||
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
|
||||
}
|
||||
|
||||
# Remove invalid ratios
|
||||
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
|
||||
|
||||
if(length(valid_ratios) == 0) {
|
||||
stop("No valid bootstrap ratios generated")
|
||||
}
|
||||
|
||||
# Calculate critical value (95th percentile)
|
||||
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
|
||||
|
||||
# Return only essential information
|
||||
return(list(
|
||||
observed_ratio = observed_ratio,
|
||||
critical_95 = critical_95,
|
||||
n_valid_iterations = length(valid_ratios)
|
||||
))
|
||||
}
|
||||
```
|
||||
|
||||
```{r hartley-results}
|
||||
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
|
||||
# within each combination of within-subjects factors (TIME × DOMAIN)
|
||||
|
||||
print(unique(long_data_clean$TEMPORAL_DO))
|
||||
print(table(long_data_clean$TEMPORAL_DO))
|
||||
|
||||
observed_temporal_ratios <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
|
||||
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
|
||||
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
|
||||
# Calculate F-max ratio
|
||||
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
|
||||
|
||||
print(observed_temporal_ratios)
|
||||
|
||||
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
|
||||
set.seed(123) # For reproducibility
|
||||
|
||||
hartley_temporal_results <- long_data_clean %>%
|
||||
group_by(TIME, DOMAIN) %>%
|
||||
summarise(
|
||||
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
|
||||
.groups = 'drop'
|
||||
) %>%
|
||||
mutate(
|
||||
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
|
||||
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
|
||||
significant = observed_ratio > critical_95
|
||||
) %>%
|
||||
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
|
||||
|
||||
print(hartley_temporal_results)
|
||||
```
|
||||
|
||||
# Mixed ANOVA Analysis
|
||||
|
||||
## Design Balance Check
|
||||
|
||||
```{r design-check}
|
||||
# Check for complete cases
|
||||
complete_cases <- sum(complete.cases(long_data_clean))
|
||||
print(complete_cases)
|
||||
|
||||
# Check if design is balanced
|
||||
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
|
||||
if(all(design_balance %in% c(0, 1))) {
|
||||
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
|
||||
} else {
|
||||
print("Warning: Design is unbalanced")
|
||||
print(summary(as.vector(design_balance)))
|
||||
}
|
||||
```
|
||||
|
||||
## Mixed ANOVA with Sphericity Corrections
|
||||
|
||||
```{r mixed-anova}
|
||||
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
|
||||
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
|
||||
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
|
||||
mixed_anova_model <- ezANOVA(data = long_data_clean,
|
||||
dv = MEAN_DIFFERENCE,
|
||||
wid = pID,
|
||||
between = TEMPORAL_DO,
|
||||
within = .(TIME, DOMAIN),
|
||||
type = 3,
|
||||
detailed = TRUE)
|
||||
|
||||
anova_output <- mixed_anova_model$ANOVA
|
||||
rownames(anova_output) <- NULL # Reset row numbers to be sequential
|
||||
print(anova_output)
|
||||
```
|
||||
|
||||
## Mauchly's Test for Sphericity
|
||||
|
||||
```{r mauchly-test}
|
||||
print(mixed_anova_model$Mauchly)
|
||||
```
|
||||
|
||||
## Sphericity-Corrected Results
|
||||
|
||||
```{r sphericity-corrections}
|
||||
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
|
||||
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
|
||||
print(mixed_anova_model$`Sphericity Corrections`)
|
||||
|
||||
# Extract and display corrected degrees of freedom
|
||||
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
|
||||
anova_table <- mixed_anova_model$ANOVA
|
||||
|
||||
corrected_df <- data.frame(
|
||||
Effect = sphericity_corr$Effect,
|
||||
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
|
||||
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
|
||||
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
|
||||
GG_epsilon = sphericity_corr$GGe,
|
||||
HF_epsilon = sphericity_corr$HFe
|
||||
)
|
||||
|
||||
print(corrected_df)
|
||||
|
||||
# Between-subjects effects (no sphericity corrections needed)
|
||||
between_effects <- c("TEMPORAL_DO")
|
||||
for(effect in between_effects) {
|
||||
if(effect %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == effect]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == effect]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == effect]
|
||||
p_value <- anova_table$p[anova_table$Effect == effect]
|
||||
|
||||
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
|
||||
}
|
||||
}
|
||||
|
||||
# Within-subjects effects (sphericity corrections where applicable)
|
||||
|
||||
# TIME main effect (2 levels, sphericity automatically satisfied)
|
||||
if("TIME" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "TIME"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "TIME"]
|
||||
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# DOMAIN main effect (4 levels, needs sphericity correction)
|
||||
if("DOMAIN" %in% anova_table$Effect) {
|
||||
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
|
||||
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
|
||||
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
|
||||
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
|
||||
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
|
||||
}
|
||||
|
||||
# Interactions with sphericity corrections
|
||||
for(i in seq_len(nrow(corrected_df))) {
|
||||
effect <- corrected_df$Effect[i]
|
||||
f_value <- anova_table$F[match(effect, anova_table$Effect)]
|
||||
|
||||
cat(sprintf("\n%s:\n", effect))
|
||||
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
|
||||
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
|
||||
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
|
||||
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
|
||||
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
|
||||
}
|
||||
} else {
|
||||
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
|
||||
}
|
||||
```
|
||||
|
||||
# Effect Sizes (Cohen's d)
|
||||
|
||||
## Main Effects
|
||||
|
||||
```{r cohens-d-main}
|
||||
# Create aov model for emmeans
|
||||
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
|
||||
data = long_data_clean)
|
||||
|
||||
# Main Effect of TIME
|
||||
time_emmeans <- emmeans(aov_model, ~ TIME)
|
||||
print(time_emmeans)
|
||||
|
||||
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
|
||||
time_main_df <- as.data.frame(time_main_contrast)
|
||||
print(time_main_df)
|
||||
|
||||
# Calculate Cohen's d for TIME main effect
|
||||
if(nrow(time_main_df) > 0) {
|
||||
cat("\nCohen's d for TIME main effect:\n")
|
||||
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
|
||||
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
|
||||
|
||||
time_cohens_d <- cohen.d(time_past_data, time_future_data)
|
||||
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
|
||||
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
|
||||
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
|
||||
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
|
||||
}
|
||||
```
|
||||
|
||||
```{r cohens-d-domain}
|
||||
# Main Effect of DOMAIN (significant: p < 0.001)
|
||||
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
|
||||
print(domain_emmeans)
|
||||
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
|
||||
domain_main_df <- as.data.frame(domain_main_contrast)
|
||||
print(domain_main_df)
|
||||
|
||||
# Calculate Cohen's d for significant DOMAIN contrasts
|
||||
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
|
||||
if(nrow(significant_domain) > 0) {
|
||||
cat("\nCohen's d for significant DOMAIN contrasts:\n")
|
||||
for(i in seq_len(nrow(significant_domain))) {
|
||||
contrast_name <- as.character(significant_domain$contrast[i])
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
|
||||
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
domain_cohens_d <- cohen.d(data1, data2)
|
||||
cat(sprintf("Comparison: %s\n", contrast_name))
|
||||
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Two-Way Interactions
|
||||
|
||||
```{r cohens-d-function}
|
||||
# Function to calculate Cohen's d for pairwise comparisons
|
||||
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
|
||||
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
|
||||
|
||||
if(nrow(significant_pairs) > 0) {
|
||||
cat("Significant pairwise comparisons (p < 0.05):\n")
|
||||
print(significant_pairs)
|
||||
|
||||
cat("\nCohen's d calculated from raw data:\n")
|
||||
|
||||
for(i in seq_len(nrow(significant_pairs))) {
|
||||
comparison <- significant_pairs[i, ]
|
||||
contrast_name <- as.character(comparison$contrast)
|
||||
|
||||
# Parse the contrast
|
||||
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
|
||||
if(length(contrast_parts) == 2) {
|
||||
level1 <- trimws(contrast_parts[1])
|
||||
level2 <- trimws(contrast_parts[2])
|
||||
|
||||
# Get raw data for both conditions
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
group2_level <- as.character(comparison[[group2_var]])
|
||||
data1 <- data[[response_var]][
|
||||
data[[group1_var]] == level1 &
|
||||
data[[group2_var]] == group2_level]
|
||||
|
||||
data2 <- data[[response_var]][
|
||||
data[[group1_var]] == level2 &
|
||||
data[[group2_var]] == group2_level]
|
||||
} else {
|
||||
data1 <- data[[response_var]][data[[group1_var]] == level1]
|
||||
data2 <- data[[response_var]][data[[group1_var]] == level2]
|
||||
}
|
||||
|
||||
if(length(data1) > 0 && length(data2) > 0) {
|
||||
# Calculate Cohen's d using effsize package
|
||||
cohens_d_result <- cohen.d(data1, data2)
|
||||
|
||||
cat(sprintf("Comparison: %s", contrast_name))
|
||||
if(group2_var %in% colnames(comparison)) {
|
||||
cat(sprintf(" | %s", group2_level))
|
||||
}
|
||||
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
|
||||
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
|
||||
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
|
||||
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
|
||||
cat("\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
cat("No significant pairwise comparisons found.\n")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
```{r interaction-effects}
|
||||
# Note: These sections would need the actual simple effects results from your analysis
|
||||
# The original script references undefined variables: temporal_time_simple and time_domain_simple
|
||||
# These would need to be calculated using emmeans for simple effects
|
||||
|
||||
# 1. TEMPORAL_DO × TIME INTERACTION
|
||||
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
|
||||
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
|
||||
|
||||
# 2. TIME × DOMAIN INTERACTION
|
||||
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
|
||||
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
|
||||
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
|
||||
```
|
||||
|
||||
# Interaction Plot
|
||||
|
||||
```{r interaction-plot}
|
||||
# Define color palette for DOMAIN (4 levels)
|
||||
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
|
||||
|
||||
# Define TIME levels (Past, Future order)
|
||||
time_levels <- c("Past", "Future")
|
||||
|
||||
# Create estimated marginal means for DOMAIN x TIME
|
||||
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
|
||||
|
||||
# Prepare emmeans data frame
|
||||
emmeans_data2 <- emm_full %>%
|
||||
as.data.frame() %>%
|
||||
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
|
||||
rename(
|
||||
ci_lower = lower.CL,
|
||||
ci_upper = upper.CL,
|
||||
plot_mean = emmean
|
||||
) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
iPlot <- long_data_clean %>%
|
||||
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
|
||||
mutate(
|
||||
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
|
||||
TIME = factor(TIME, levels = time_levels)
|
||||
)
|
||||
|
||||
# Plot without TEMPORAL_DO facet
|
||||
interaction_plot2 <- ggplot() +
|
||||
geom_point(
|
||||
data = iPlot,
|
||||
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
|
||||
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
|
||||
alpha = 0.3, shape = 16
|
||||
) +
|
||||
geom_rect(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
ymin = ci_lower, ymax = ci_upper,
|
||||
fill = DOMAIN
|
||||
),
|
||||
color = "black", alpha = 0.5
|
||||
) +
|
||||
geom_segment(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = ci_lower, yend = ci_upper
|
||||
),
|
||||
color = "black"
|
||||
) +
|
||||
geom_point(
|
||||
data = emmeans_data2,
|
||||
aes(
|
||||
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
|
||||
y = plot_mean,
|
||||
color = DOMAIN,
|
||||
shape = DOMAIN
|
||||
),
|
||||
size = 2.5, stroke = 0.8, fill = "black"
|
||||
) +
|
||||
labs(
|
||||
x = "TIME", y = "Mean Difference",
|
||||
title = "DOMAIN × TIME Interaction", subtitle = ""
|
||||
) +
|
||||
scale_color_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_fill_manual(name = "DOMAIN", values = cbp1) +
|
||||
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
|
||||
theme_minimal() +
|
||||
theme(
|
||||
axis.text.x = element_text(angle = 0, hjust = 0.5),
|
||||
plot.title = element_text(size = 14, hjust = 0.5),
|
||||
plot.subtitle = element_text(size = 12, hjust = 0.5)
|
||||
)
|
||||
|
||||
print(interaction_plot2)
|
||||
```
|
||||
|
||||
0
.history/eohi1/Untitled-1_20251020173338.r
Normal file
0
.history/eohi1/Untitled-1_20251020173338.r
Normal file
15
.history/eohi1/Untitled-1_20251020173353.r
Normal file
15
.history/eohi1/Untitled-1_20251020173353.r
Normal file
@ -0,0 +1,15 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
library(car)
|
||||
library(lmtest)
|
||||
library(stargazer)
|
||||
library(sandwich)
|
||||
library(lmtest)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_edu)
|
||||
@ -0,0 +1,162 @@
|
||||
# Assumption Checks Before Cronbach's Alpha Analysis
|
||||
# Run this BEFORE the main reliability analysis
|
||||
|
||||
library(psych)
|
||||
library(corrplot)
|
||||
library(ggplot2)
|
||||
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
# Define scale variables
|
||||
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
|
||||
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
|
||||
|
||||
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
|
||||
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
|
||||
|
||||
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
|
||||
"NPastDiff_val_performance", "NPastDiff_val_justice")
|
||||
|
||||
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
|
||||
"NPastDiff_life_important", "NPastDiff_life_change")
|
||||
|
||||
# Function to check assumptions for a scale
|
||||
check_assumptions <- function(data, var_names, scale_name) {
|
||||
cat("\n", "="*60, "\n")
|
||||
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
|
||||
cat("="*60, "\n")
|
||||
|
||||
# Get scale data
|
||||
scale_data <- data[, var_names]
|
||||
|
||||
# 1. Sample size check
|
||||
complete_cases <- sum(complete.cases(scale_data))
|
||||
cat("1. SAMPLE SIZE CHECK:\n")
|
||||
cat(" Total participants:", nrow(data), "\n")
|
||||
cat(" Complete cases:", complete_cases, "\n")
|
||||
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
if(complete_cases < 30) {
|
||||
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
# 2. Missing data check
|
||||
cat("\n2. MISSING DATA CHECK:\n")
|
||||
missing_counts <- colSums(is.na(scale_data))
|
||||
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
|
||||
cat(" Missing data by item:\n")
|
||||
for(i in 1:length(var_names)) {
|
||||
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
|
||||
}
|
||||
|
||||
max_missing <- max(missing_pct)
|
||||
cat(" Maximum missing:", max_missing, "%\n")
|
||||
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
# 3. Use only complete cases for remaining checks
|
||||
complete_data <- scale_data[complete.cases(scale_data), ]
|
||||
|
||||
# 4. Normality check (Shapiro-Wilk test on first item as example)
|
||||
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
|
||||
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
|
||||
shapiro_result <- shapiro.test(complete_data[, 1])
|
||||
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
|
||||
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
|
||||
} else {
|
||||
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
|
||||
}
|
||||
|
||||
# 5. Inter-item correlations check
|
||||
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
|
||||
cor_matrix <- cor(complete_data)
|
||||
|
||||
# Get off-diagonal correlations
|
||||
cor_matrix[lower.tri(cor_matrix)] <- NA
|
||||
diag(cor_matrix) <- NA
|
||||
cors <- as.vector(cor_matrix)
|
||||
cors <- cors[!is.na(cors)]
|
||||
|
||||
positive_cors <- sum(cors > 0)
|
||||
strong_cors <- sum(cors > 0.30)
|
||||
negative_cors <- sum(cors < 0)
|
||||
|
||||
cat(" Total correlations:", length(cors), "\n")
|
||||
cat(" Positive correlations:", positive_cors, "\n")
|
||||
cat(" Strong correlations (>0.30):", strong_cors, "\n")
|
||||
cat(" Negative correlations:", negative_cors, "\n")
|
||||
cat(" Mean correlation:", round(mean(cors), 4), "\n")
|
||||
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
|
||||
|
||||
if(negative_cors > 0) {
|
||||
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
|
||||
}
|
||||
if(strong_cors / length(cors) < 0.5) {
|
||||
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
|
||||
}
|
||||
|
||||
# 6. Item variance check
|
||||
cat("\n5. ITEM VARIANCE CHECK:\n")
|
||||
item_vars <- apply(complete_data, 2, var)
|
||||
var_ratio <- max(item_vars) / min(item_vars)
|
||||
cat(" Item variances:", round(item_vars, 4), "\n")
|
||||
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
|
||||
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
# 7. Outlier check
|
||||
cat("\n6. OUTLIER CHECK:\n")
|
||||
# Check for multivariate outliers using Mahalanobis distance
|
||||
if(nrow(complete_data) > ncol(complete_data)) {
|
||||
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
|
||||
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
|
||||
outliers <- sum(mahal_dist > outlier_threshold)
|
||||
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
|
||||
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
|
||||
}
|
||||
|
||||
# 8. Summary recommendation
|
||||
cat("\n7. OVERALL RECOMMENDATION:\n")
|
||||
issues <- 0
|
||||
if(complete_cases < 30) issues <- issues + 1
|
||||
if(max_missing >= 20) issues <- issues + 1
|
||||
if(negative_cors > 0) issues <- issues + 1
|
||||
if(var_ratio >= 4) issues <- issues + 1
|
||||
|
||||
if(issues == 0) {
|
||||
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
|
||||
} else if(issues <= 2) {
|
||||
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
|
||||
} else {
|
||||
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
|
||||
}
|
||||
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
# Check assumptions for all past scales
|
||||
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
|
||||
cat("\nData: exp1.csv")
|
||||
cat("\nTotal sample size:", nrow(data))
|
||||
|
||||
check_assumptions(data, past_pref_vars, "Past Preferences")
|
||||
check_assumptions(data, past_pers_vars, "Past Personality")
|
||||
check_assumptions(data, past_val_vars, "Past Values")
|
||||
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
|
||||
|
||||
# Quick check of future scales (you can expand this)
|
||||
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
|
||||
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
||||
|
||||
check_assumptions(data, fut_pref_vars, "Future Preferences")
|
||||
|
||||
cat("\n", "="*60, "\n")
|
||||
cat("GENERAL GUIDELINES:\n")
|
||||
cat("="*60, "\n")
|
||||
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
|
||||
cat("⚠️ If some assumptions are violated, interpret with caution\n")
|
||||
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
|
||||
cat(" - Omega coefficient (more robust to violations)\n")
|
||||
cat(" - Split-half reliability\n")
|
||||
cat(" - Test-retest reliability\n")
|
||||
cat(" - Factor analysis to check dimensionality\n")
|
||||
@ -0,0 +1,162 @@
|
||||
# Assumption Checks Before Cronbach's Alpha Analysis
|
||||
# Run this BEFORE the main reliability analysis
|
||||
|
||||
library(psych)
|
||||
library(corrplot)
|
||||
library(ggplot2)
|
||||
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
# Define scale variables
|
||||
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
|
||||
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
|
||||
|
||||
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
|
||||
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
|
||||
|
||||
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
|
||||
"NPastDiff_val_performance", "NPastDiff_val_justice")
|
||||
|
||||
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
|
||||
"NPastDiff_life_important", "NPastDiff_life_change")
|
||||
|
||||
# Function to check assumptions for a scale
|
||||
check_assumptions <- function(data, var_names, scale_name) {
|
||||
cat("\n", "="*60, "\n")
|
||||
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
|
||||
cat("="*60, "\n")
|
||||
|
||||
# Get scale data
|
||||
scale_data <- data[, var_names]
|
||||
|
||||
# 1. Sample size check
|
||||
complete_cases <- sum(complete.cases(scale_data))
|
||||
cat("1. SAMPLE SIZE CHECK:\n")
|
||||
cat(" Total participants:", nrow(data), "\n")
|
||||
cat(" Complete cases:", complete_cases, "\n")
|
||||
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
if(complete_cases < 30) {
|
||||
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
# 2. Missing data check
|
||||
cat("\n2. MISSING DATA CHECK:\n")
|
||||
missing_counts <- colSums(is.na(scale_data))
|
||||
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
|
||||
cat(" Missing data by item:\n")
|
||||
for(i in 1:length(var_names)) {
|
||||
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
|
||||
}
|
||||
|
||||
max_missing <- max(missing_pct)
|
||||
cat(" Maximum missing:", max_missing, "%\n")
|
||||
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
# 3. Use only complete cases for remaining checks
|
||||
complete_data <- scale_data[complete.cases(scale_data), ]
|
||||
|
||||
# 4. Normality check (Shapiro-Wilk test on first item as example)
|
||||
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
|
||||
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
|
||||
shapiro_result <- shapiro.test(complete_data[, 1])
|
||||
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
|
||||
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
|
||||
} else {
|
||||
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
|
||||
}
|
||||
|
||||
# 5. Inter-item correlations check
|
||||
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
|
||||
cor_matrix <- cor(complete_data)
|
||||
|
||||
# Get off-diagonal correlations
|
||||
cor_matrix[lower.tri(cor_matrix)] <- NA
|
||||
diag(cor_matrix) <- NA
|
||||
cors <- as.vector(cor_matrix)
|
||||
cors <- cors[!is.na(cors)]
|
||||
|
||||
positive_cors <- sum(cors > 0)
|
||||
strong_cors <- sum(cors > 0.30)
|
||||
negative_cors <- sum(cors < 0)
|
||||
|
||||
cat(" Total correlations:", length(cors), "\n")
|
||||
cat(" Positive correlations:", positive_cors, "\n")
|
||||
cat(" Strong correlations (>0.30):", strong_cors, "\n")
|
||||
cat(" Negative correlations:", negative_cors, "\n")
|
||||
cat(" Mean correlation:", round(mean(cors), 4), "\n")
|
||||
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
|
||||
|
||||
if(negative_cors > 0) {
|
||||
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
|
||||
}
|
||||
if(strong_cors / length(cors) < 0.5) {
|
||||
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
|
||||
}
|
||||
|
||||
# 6. Item variance check
|
||||
cat("\n5. ITEM VARIANCE CHECK:\n")
|
||||
item_vars <- apply(complete_data, 2, var)
|
||||
var_ratio <- max(item_vars) / min(item_vars)
|
||||
cat(" Item variances:", round(item_vars, 4), "\n")
|
||||
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
|
||||
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
# 7. Outlier check
|
||||
cat("\n6. OUTLIER CHECK:\n")
|
||||
# Check for multivariate outliers using Mahalanobis distance
|
||||
if(nrow(complete_data) > ncol(complete_data)) {
|
||||
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
|
||||
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
|
||||
outliers <- sum(mahal_dist > outlier_threshold)
|
||||
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
|
||||
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
|
||||
}
|
||||
|
||||
# 8. Summary recommendation
|
||||
cat("\n7. OVERALL RECOMMENDATION:\n")
|
||||
issues <- 0
|
||||
if(complete_cases < 30) issues <- issues + 1
|
||||
if(max_missing >= 20) issues <- issues + 1
|
||||
if(negative_cors > 0) issues <- issues + 1
|
||||
if(var_ratio >= 4) issues <- issues + 1
|
||||
|
||||
if(issues == 0) {
|
||||
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
|
||||
} else if(issues <= 2) {
|
||||
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
|
||||
} else {
|
||||
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
|
||||
}
|
||||
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
# Check assumptions for all past scales
|
||||
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
|
||||
cat("\nData: exp1.csv")
|
||||
cat("\nTotal sample size:", nrow(data))
|
||||
|
||||
check_assumptions(data, past_pref_vars, "Past Preferences")
|
||||
check_assumptions(data, past_pers_vars, "Past Personality")
|
||||
check_assumptions(data, past_val_vars, "Past Values")
|
||||
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
|
||||
|
||||
# Quick check of future scales (you can expand this)
|
||||
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
|
||||
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
||||
|
||||
check_assumptions(data, fut_pref_vars, "Future Preferences")
|
||||
|
||||
cat("\n", "="*60, "\n")
|
||||
cat("GENERAL GUIDELINES:\n")
|
||||
cat("="*60, "\n")
|
||||
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
|
||||
cat("⚠️ If some assumptions are violated, interpret with caution\n")
|
||||
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
|
||||
cat(" - Omega coefficient (more robust to violations)\n")
|
||||
cat(" - Split-half reliability\n")
|
||||
cat(" - Test-retest reliability\n")
|
||||
cat(" - Factor analysis to check dimensionality\n")
|
||||
@ -0,0 +1,164 @@
|
||||
# Assumption Checks Before Cronbach's Alpha Analysis
|
||||
# Run this BEFORE the main reliability analysis
|
||||
|
||||
library(psych)
|
||||
library(corrplot)
|
||||
library(ggplot2)
|
||||
|
||||
# Read the data
|
||||
data <- read.csv("exp1.csv")
|
||||
|
||||
# Define scale variables
|
||||
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
|
||||
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
|
||||
|
||||
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
|
||||
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
|
||||
|
||||
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
|
||||
"NPastDiff_val_performance", "NPastDiff_val_justice")
|
||||
|
||||
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
|
||||
"NPastDiff_life_important", "NPastDiff_life_change")
|
||||
|
||||
# Function to check assumptions for a scale
|
||||
check_assumptions <- function(data, var_names, scale_name) {
|
||||
cat("\n", "="*60, "\n")
|
||||
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
|
||||
cat("="*60, "\n")
|
||||
|
||||
# Get scale data
|
||||
scale_data <- data[, var_names]
|
||||
|
||||
# 1. Sample size check
|
||||
complete_cases <- sum(complete.cases(scale_data))
|
||||
cat("1. SAMPLE SIZE CHECK:\n")
|
||||
cat(" Total participants:", nrow(data), "\n")
|
||||
cat(" Complete cases:", complete_cases, "\n")
|
||||
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
if(complete_cases < 30) {
|
||||
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
|
||||
return(FALSE)
|
||||
}
|
||||
|
||||
# 2. Missing data check
|
||||
cat("\n2. MISSING DATA CHECK:\n")
|
||||
missing_counts <- colSums(is.na(scale_data))
|
||||
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
|
||||
cat(" Missing data by item:\n")
|
||||
for(i in 1:length(var_names)) {
|
||||
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
|
||||
}
|
||||
|
||||
max_missing <- max(missing_pct)
|
||||
cat(" Maximum missing:", max_missing, "%\n")
|
||||
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
# 3. Use only complete cases for remaining checks
|
||||
complete_data <- scale_data[complete.cases(scale_data), ]
|
||||
|
||||
# 4. Normality check (Shapiro-Wilk test on first item as example)
|
||||
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
|
||||
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
|
||||
shapiro_result <- shapiro.test(complete_data[, 1])
|
||||
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
|
||||
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
|
||||
} else {
|
||||
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
|
||||
}
|
||||
|
||||
# 5. Inter-item correlations check
|
||||
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
|
||||
cor_matrix <- cor(complete_data)
|
||||
|
||||
# Get off-diagonal correlations
|
||||
cor_matrix[lower.tri(cor_matrix)] <- NA
|
||||
diag(cor_matrix) <- NA
|
||||
cors <- as.vector(cor_matrix)
|
||||
cors <- cors[!is.na(cors)]
|
||||
|
||||
positive_cors <- sum(cors > 0)
|
||||
strong_cors <- sum(cors > 0.30)
|
||||
negative_cors <- sum(cors < 0)
|
||||
|
||||
cat(" Total correlations:", length(cors), "\n")
|
||||
cat(" Positive correlations:", positive_cors, "\n")
|
||||
cat(" Strong correlations (>0.30):", strong_cors, "\n")
|
||||
cat(" Negative correlations:", negative_cors, "\n")
|
||||
cat(" Mean correlation:", round(mean(cors), 4), "\n")
|
||||
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
|
||||
|
||||
if(negative_cors > 0) {
|
||||
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
|
||||
}
|
||||
if(strong_cors / length(cors) < 0.5) {
|
||||
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
|
||||
}
|
||||
|
||||
# 6. Item variance check
|
||||
cat("\n5. ITEM VARIANCE CHECK:\n")
|
||||
item_vars <- apply(complete_data, 2, var)
|
||||
var_ratio <- max(item_vars) / min(item_vars)
|
||||
cat(" Item variances:", round(item_vars, 4), "\n")
|
||||
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
|
||||
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
|
||||
|
||||
# 7. Outlier check
|
||||
cat("\n6. OUTLIER CHECK:\n")
|
||||
# Check for multivariate outliers using Mahalanobis distance
|
||||
if(nrow(complete_data) > ncol(complete_data)) {
|
||||
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
|
||||
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
|
||||
outliers <- sum(mahal_dist > outlier_threshold)
|
||||
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
|
||||
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
|
||||
}
|
||||
|
||||
# 8. Summary recommendation
|
||||
cat("\n7. OVERALL RECOMMENDATION:\n")
|
||||
issues <- 0
|
||||
if(complete_cases < 30) issues <- issues + 1
|
||||
if(max_missing >= 20) issues <- issues + 1
|
||||
if(negative_cors > 0) issues <- issues + 1
|
||||
if(var_ratio >= 4) issues <- issues + 1
|
||||
|
||||
if(issues == 0) {
|
||||
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
|
||||
} else if(issues <= 2) {
|
||||
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
|
||||
} else {
|
||||
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
|
||||
}
|
||||
|
||||
return(TRUE)
|
||||
}
|
||||
|
||||
# Check assumptions for all past scales
|
||||
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
|
||||
cat("\nData: exp1.csv")
|
||||
cat("\nTotal sample size:", nrow(data))
|
||||
|
||||
check_assumptions(data, past_pref_vars, "Past Preferences")
|
||||
check_assumptions(data, past_pers_vars, "Past Personality")
|
||||
check_assumptions(data, past_val_vars, "Past Values")
|
||||
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
|
||||
|
||||
# Quick check of future scales (you can expand this)
|
||||
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
|
||||
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
|
||||
|
||||
check_assumptions(data, fut_pref_vars, "Future Preferences")
|
||||
|
||||
cat("\n", "="*60, "\n")
|
||||
cat("GENERAL GUIDELINES:\n")
|
||||
cat("="*60, "\n")
|
||||
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
|
||||
cat("⚠️ If some assumptions are violated, interpret with caution\n")
|
||||
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
|
||||
cat(" - Omega coefficient (more robust to violations)\n")
|
||||
cat(" - Split-half reliability\n")
|
||||
cat(" - Test-retest reliability\n")
|
||||
cat(" - Factor analysis to check dimensionality\n")
|
||||
|
||||
|
||||
0
.history/eohi1/brierVARS_20250922124859.vb
Normal file
0
.history/eohi1/brierVARS_20250922124859.vb
Normal file
82
.history/eohi1/brierVARS_20250922124900.vb
Normal file
82
.history/eohi1/brierVARS_20250922124900.vb
Normal file
@ -0,0 +1,82 @@
|
||||
Function GetCol(headerName As String, ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
GetCol = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
If IsError(GetCol) Then GetCol = 0
|
||||
On Error GoTo 0
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
87
.history/eohi1/brierVARS_20250922125338.vb
Normal file
87
.history/eohi1/brierVARS_20250922125338.vb
Normal file
@ -0,0 +1,87 @@
|
||||
Function GetCol(headerName As String, ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
87
.history/eohi1/brierVARS_20250922125353.vb
Normal file
87
.history/eohi1/brierVARS_20250922125353.vb
Normal file
@ -0,0 +1,87 @@
|
||||
Function GetCol(headerName As String, ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
87
.history/eohi1/brierVARS_20250922125648.vb
Normal file
87
.history/eohi1/brierVARS_20250922125648.vb
Normal file
@ -0,0 +1,87 @@
|
||||
Function GetCol(headerName As String, ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
87
.history/eohi1/brierVARS_20250922125759.vb
Normal file
87
.history/eohi1/brierVARS_20250922125759.vb
Normal file
@ -0,0 +1,87 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
87
.history/eohi1/brierVARS_20250922125807.vb
Normal file
87
.history/eohi1/brierVARS_20250922125807.vb
Normal file
@ -0,0 +1,87 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
87
.history/eohi1/brierVARS_20250922125821.vb
Normal file
87
.history/eohi1/brierVARS_20250922125821.vb
Normal file
@ -0,0 +1,87 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 0) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 0) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = ws.Cells(r, colSourceCON).Value
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
If checkVal = "TRUE" Then
|
||||
result = ((val / 100) - 1) ^ 2
|
||||
ElseIf checkVal = "FALSE" Then
|
||||
result = ((1 - (val / 100)) - 1) ^ 2
|
||||
Else
|
||||
result = CVErr(xlErrNA)
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
141
.history/eohi1/brierVARS_20250922130226.vb
Normal file
141
.history/eohi1/brierVARS_20250922130226.vb
Normal file
@ -0,0 +1,141 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||
' Returns True/False or Null if cannot determine
|
||||
If VarType(v) = vbBoolean Then
|
||||
NormalizeTruth = v
|
||||
Exit Function
|
||||
End If
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeTruth = Null
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = Trim$(UCase$(CStr(v)))
|
||||
Select Case s
|
||||
Case "TRUE", "T", "1", "YES", "Y"
|
||||
NormalizeTruth = True
|
||||
Case "FALSE", "F", "0", "NO", "N"
|
||||
NormalizeTruth = False
|
||||
Case Else
|
||||
NormalizeTruth = Null
|
||||
End Select
|
||||
End Function
|
||||
|
||||
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||
' Converts confidence values to [0,1]
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeProb01 = -1
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = CStr(v)
|
||||
If InStr(s, "%") > 0 Then
|
||||
s = Replace$(s, "%", "")
|
||||
If IsNumeric(s) Then
|
||||
NormalizeProb01 = CDbl(s) / 100#
|
||||
Exit Function
|
||||
End If
|
||||
End If
|
||||
If IsNumeric(v) Then
|
||||
Dim d As Double
|
||||
d = CDbl(v)
|
||||
If d > 1# Then
|
||||
NormalizeProb01 = d / 100#
|
||||
Else
|
||||
NormalizeProb01 = d
|
||||
End If
|
||||
Else
|
||||
NormalizeProb01 = -1
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth As Variant
|
||||
truth = NormalizeTruth(checkVal)
|
||||
If IsNull(truth) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth = True Then
|
||||
result = (val - 0#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 0#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth2 As Variant
|
||||
truth2 = NormalizeTruth(checkVal)
|
||||
If IsNull(truth2) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth2 = True Then
|
||||
result = (val - 1#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 1#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
141
.history/eohi1/brierVARS_20250922130242.vb
Normal file
141
.history/eohi1/brierVARS_20250922130242.vb
Normal file
@ -0,0 +1,141 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||
' Returns True/False or Null if cannot determine
|
||||
If VarType(v) = vbBoolean Then
|
||||
NormalizeTruth = v
|
||||
Exit Function
|
||||
End If
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeTruth = Null
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = Trim$(UCase$(CStr(v)))
|
||||
Select Case s
|
||||
Case "TRUE", "T", "1", "YES", "Y"
|
||||
NormalizeTruth = True
|
||||
Case "FALSE", "F", "0", "NO", "N"
|
||||
NormalizeTruth = False
|
||||
Case Else
|
||||
NormalizeTruth = Null
|
||||
End Select
|
||||
End Function
|
||||
|
||||
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||
' Converts confidence values to [0,1]
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeProb01 = -1
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = CStr(v)
|
||||
If InStr(s, "%") > 0 Then
|
||||
s = Replace$(s, "%", "")
|
||||
If IsNumeric(s) Then
|
||||
NormalizeProb01 = CDbl(s) / 100#
|
||||
Exit Function
|
||||
End If
|
||||
End If
|
||||
If IsNumeric(v) Then
|
||||
Dim d As Double
|
||||
d = CDbl(v)
|
||||
If d > 1# Then
|
||||
NormalizeProb01 = d / 100#
|
||||
Else
|
||||
NormalizeProb01 = d
|
||||
End If
|
||||
Else
|
||||
NormalizeProb01 = -1
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth As Variant
|
||||
truth = NormalizeTruth(checkVal)
|
||||
If IsNull(truth) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth = True Then
|
||||
result = (val - 0#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 0#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth2 As Variant
|
||||
truth2 = NormalizeTruth(checkVal)
|
||||
If IsNull(truth2) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth2 = True Then
|
||||
result = (val - 1#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 1#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
141
.history/eohi1/brierVARS_20250922130423.vb
Normal file
141
.history/eohi1/brierVARS_20250922130423.vb
Normal file
@ -0,0 +1,141 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||
' Returns True/False or Null if cannot determine
|
||||
If VarType(v) = vbBoolean Then
|
||||
NormalizeTruth = v
|
||||
Exit Function
|
||||
End If
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeTruth = Null
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = Trim$(UCase$(CStr(v)))
|
||||
Select Case s
|
||||
Case "TRUE", "T", "1", "YES", "Y"
|
||||
NormalizeTruth = True
|
||||
Case "FALSE", "F", "0", "NO", "N"
|
||||
NormalizeTruth = False
|
||||
Case Else
|
||||
NormalizeTruth = Null
|
||||
End Select
|
||||
End Function
|
||||
|
||||
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||
' Converts confidence values to [0,1]
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeProb01 = -1
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = CStr(v)
|
||||
If InStr(s, "%") > 0 Then
|
||||
s = Replace$(s, "%", "")
|
||||
If IsNumeric(s) Then
|
||||
NormalizeProb01 = CDbl(s) / 100#
|
||||
Exit Function
|
||||
End If
|
||||
End If
|
||||
If IsNumeric(v) Then
|
||||
Dim d As Double
|
||||
d = CDbl(v)
|
||||
If d > 1# Then
|
||||
NormalizeProb01 = d / 100#
|
||||
Else
|
||||
NormalizeProb01 = d
|
||||
End If
|
||||
Else
|
||||
NormalizeProb01 = -1
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth As Variant
|
||||
truth = NormalizeTruth(checkVal)
|
||||
If IsNull(truth) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth = True Then
|
||||
result = (val - 0#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 0#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth2 As Variant
|
||||
truth2 = NormalizeTruth(checkVal)
|
||||
If IsNull(truth2) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth2 = True Then
|
||||
result = (val - 1#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 1#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
141
.history/eohi1/brierVARS_20250922130433.vb
Normal file
141
.history/eohi1/brierVARS_20250922130433.vb
Normal file
@ -0,0 +1,141 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||
' Returns True/False or Null if cannot determine
|
||||
If VarType(v) = vbBoolean Then
|
||||
NormalizeTruth = v
|
||||
Exit Function
|
||||
End If
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeTruth = Null
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = Trim$(UCase$(CStr(v)))
|
||||
Select Case s
|
||||
Case "TRUE", "T", "1", "YES", "Y"
|
||||
NormalizeTruth = True
|
||||
Case "FALSE", "F", "0", "NO", "N"
|
||||
NormalizeTruth = False
|
||||
Case Else
|
||||
NormalizeTruth = Null
|
||||
End Select
|
||||
End Function
|
||||
|
||||
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||
' Converts confidence values to [0,1]
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeProb01 = -1
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = CStr(v)
|
||||
If InStr(s, "%") > 0 Then
|
||||
s = Replace$(s, "%", "")
|
||||
If IsNumeric(s) Then
|
||||
NormalizeProb01 = CDbl(s) / 100#
|
||||
Exit Function
|
||||
End If
|
||||
End If
|
||||
If IsNumeric(v) Then
|
||||
Dim d As Double
|
||||
d = CDbl(v)
|
||||
If d > 1# Then
|
||||
NormalizeProb01 = d / 100#
|
||||
Else
|
||||
NormalizeProb01 = d
|
||||
End If
|
||||
Else
|
||||
NormalizeProb01 = -1
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth As Variant
|
||||
truth = NormalizeTruth(checkVal)
|
||||
If IsNull(truth) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth = True Then
|
||||
result = (val - 0#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 0#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth2 As Variant
|
||||
truth2 = NormalizeTruth(checkVal)
|
||||
If IsNull(truth2) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth2 = True Then
|
||||
result = (val - 1#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 1#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
141
.history/eohi1/brierVARS_20250922130435.vb
Normal file
141
.history/eohi1/brierVARS_20250922130435.vb
Normal file
@ -0,0 +1,141 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||
' Returns True/False or Null if cannot determine
|
||||
If VarType(v) = vbBoolean Then
|
||||
NormalizeTruth = v
|
||||
Exit Function
|
||||
End If
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeTruth = Null
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = Trim$(UCase$(CStr(v)))
|
||||
Select Case s
|
||||
Case "TRUE", "T", "1", "YES", "Y"
|
||||
NormalizeTruth = True
|
||||
Case "FALSE", "F", "0", "NO", "N"
|
||||
NormalizeTruth = False
|
||||
Case Else
|
||||
NormalizeTruth = Null
|
||||
End Select
|
||||
End Function
|
||||
|
||||
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||
' Converts confidence values to [0,1]
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeProb01 = -1
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = CStr(v)
|
||||
If InStr(s, "%") > 0 Then
|
||||
s = Replace$(s, "%", "")
|
||||
If IsNumeric(s) Then
|
||||
NormalizeProb01 = CDbl(s) / 100#
|
||||
Exit Function
|
||||
End If
|
||||
End If
|
||||
If IsNumeric(v) Then
|
||||
Dim d As Double
|
||||
d = CDbl(v)
|
||||
If d > 1# Then
|
||||
NormalizeProb01 = d / 100#
|
||||
Else
|
||||
NormalizeProb01 = d
|
||||
End If
|
||||
Else
|
||||
NormalizeProb01 = -1
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth As Variant
|
||||
truth = NormalizeTruth(checkVal)
|
||||
If IsNull(truth) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth = True Then
|
||||
result = (val - 0#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 0#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth2 As Variant
|
||||
truth2 = NormalizeTruth(checkVal)
|
||||
If IsNull(truth2) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth2 = True Then
|
||||
result = (val - 1#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 1#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
141
.history/eohi1/brierVARS_20250922131020.vb
Normal file
141
.history/eohi1/brierVARS_20250922131020.vb
Normal file
@ -0,0 +1,141 @@
|
||||
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
|
||||
Dim lastCol As Long
|
||||
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
|
||||
On Error Resume Next
|
||||
Dim m As Variant
|
||||
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
|
||||
On Error GoTo 0
|
||||
If IsError(m) Then
|
||||
GetCol = 0
|
||||
Else
|
||||
GetCol = CLng(m)
|
||||
End If
|
||||
End Function
|
||||
|
||||
Private Function NormalizeTruth(ByVal v As Variant) As Variant
|
||||
' Returns True/False or Null if cannot determine
|
||||
If VarType(v) = vbBoolean Then
|
||||
NormalizeTruth = v
|
||||
Exit Function
|
||||
End If
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeTruth = Null
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = Trim$(UCase$(CStr(v)))
|
||||
Select Case s
|
||||
Case "TRUE", "T", "1", "YES", "Y"
|
||||
NormalizeTruth = True
|
||||
Case "FALSE", "F", "0", "NO", "N"
|
||||
NormalizeTruth = False
|
||||
Case Else
|
||||
NormalizeTruth = Null
|
||||
End Select
|
||||
End Function
|
||||
|
||||
Private Function NormalizeProb01(ByVal v As Variant) As Double
|
||||
' Converts confidence values to [0,1]
|
||||
If IsError(v) Or IsEmpty(v) Then
|
||||
NormalizeProb01 = -1
|
||||
Exit Function
|
||||
End If
|
||||
Dim s As String
|
||||
s = CStr(v)
|
||||
If InStr(s, "%") > 0 Then
|
||||
s = Replace$(s, "%", "")
|
||||
If IsNumeric(s) Then
|
||||
NormalizeProb01 = CDbl(s) / 100#
|
||||
Exit Function
|
||||
End If
|
||||
End If
|
||||
If IsNumeric(v) Then
|
||||
Dim d As Double
|
||||
d = CDbl(v)
|
||||
If d > 1# Then
|
||||
NormalizeProb01 = d / 100#
|
||||
Else
|
||||
NormalizeProb01 = d
|
||||
End If
|
||||
Else
|
||||
NormalizeProb01 = -1
|
||||
End If
|
||||
End Function
|
||||
|
||||
Sub brierVARS()
|
||||
Dim false_vars As Variant
|
||||
Dim true_vars As Variant
|
||||
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
|
||||
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
|
||||
Dim target_headers As Variant
|
||||
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
|
||||
Dim ws As Worksheet
|
||||
Set ws = ThisWorkbook.Sheets(1)
|
||||
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
|
||||
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
|
||||
Dim result As Variant
|
||||
Dim r As Long, j As Long
|
||||
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
|
||||
For i = 0 To UBound(false_vars) Step 2
|
||||
srcVar = false_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(false_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth As Variant
|
||||
truth = NormalizeTruth(checkVal)
|
||||
If IsNull(truth) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth = True Then
|
||||
result = (val - 0#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 0#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
For i = 0 To UBound(true_vars) Step 2
|
||||
srcVar = true_vars(i)
|
||||
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
|
||||
colSource1 = GetCol(srcVar, ws)
|
||||
colSourceCON = GetCol(true_vars(i + 1), ws)
|
||||
If colSource1 > 0 And colSourceCON > 0 Then
|
||||
For r = 2 To rowCount
|
||||
checkVal = ws.Cells(r, colSource1).Value
|
||||
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
|
||||
colTarget = 0
|
||||
For j = 0 To UBound(target_headers)
|
||||
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
|
||||
colTarget = GetCol(target_headers(j), ws)
|
||||
Exit For
|
||||
End If
|
||||
Next j
|
||||
If colTarget > 0 Then
|
||||
Dim truth2 As Variant
|
||||
truth2 = NormalizeTruth(checkVal)
|
||||
If IsNull(truth2) Or val < 0 Then
|
||||
result = CVErr(xlErrNA)
|
||||
ElseIf truth2 = True Then
|
||||
result = (val - 1#) ^ 2
|
||||
Else
|
||||
result = ((1# - val) - 1#) ^ 2
|
||||
End If
|
||||
ws.Cells(r, colTarget).Value = result
|
||||
End If
|
||||
Next r
|
||||
End If
|
||||
Next i
|
||||
End Sub
|
||||
0
.history/eohi1/correlation matrix_20251027115053.r
Normal file
0
.history/eohi1/correlation matrix_20251027115053.r
Normal file
40
.history/eohi1/correlation matrix_20251027115054.r
Normal file
40
.history/eohi1/correlation matrix_20251027115054.r
Normal file
@ -0,0 +1,40 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
str(data)
|
||||
colSums(is.na(data))
|
||||
sapply(data, class)
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
50
.history/eohi1/correlation matrix_20251027115900.r
Normal file
50
.history/eohi1/correlation matrix_20251027115900.r
Normal file
@ -0,0 +1,50 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
50
.history/eohi1/correlation matrix_20251027115902.r
Normal file
50
.history/eohi1/correlation matrix_20251027115902.r
Normal file
@ -0,0 +1,50 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
50
.history/eohi1/correlation matrix_20251027115905.r
Normal file
50
.history/eohi1/correlation matrix_20251027115905.r
Normal file
@ -0,0 +1,50 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
50
.history/eohi1/correlation matrix_20251027120022.r
Normal file
50
.history/eohi1/correlation matrix_20251027120022.r
Normal file
@ -0,0 +1,50 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
67
.history/eohi1/correlation matrix_20251027120056.r
Normal file
67
.history/eohi1/correlation matrix_20251027120056.r
Normal file
@ -0,0 +1,67 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
67
.history/eohi1/correlation matrix_20251027120100.r
Normal file
67
.history/eohi1/correlation matrix_20251027120100.r
Normal file
@ -0,0 +1,67 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
67
.history/eohi1/correlation matrix_20251027120122.r
Normal file
67
.history/eohi1/correlation matrix_20251027120122.r
Normal file
@ -0,0 +1,67 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
67
.history/eohi1/correlation matrix_20251027120345.r
Normal file
67
.history/eohi1/correlation matrix_20251027120345.r
Normal file
@ -0,0 +1,67 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
67
.history/eohi1/correlation matrix_20251027120348.r
Normal file
67
.history/eohi1/correlation matrix_20251027120348.r
Normal file
@ -0,0 +1,67 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
67
.history/eohi1/correlation matrix_20251027120351.r
Normal file
67
.history/eohi1/correlation matrix_20251027120351.r
Normal file
@ -0,0 +1,67 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
76
.history/eohi1/correlation matrix_20251027120448.r
Normal file
76
.history/eohi1/correlation matrix_20251027120448.r
Normal file
@ -0,0 +1,76 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations
|
||||
library(Hmisc)
|
||||
cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
|
||||
|
||||
# Print correlation matrix with significance
|
||||
print("Correlation Matrix with Significance:")
|
||||
print(cor_test$r)
|
||||
print("\nP-values:")
|
||||
print(cor_test$P)
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
76
.history/eohi1/correlation matrix_20251027120450.r
Normal file
76
.history/eohi1/correlation matrix_20251027120450.r
Normal file
@ -0,0 +1,76 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations
|
||||
library(Hmisc)
|
||||
cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
|
||||
|
||||
# Print correlation matrix with significance
|
||||
print("Correlation Matrix with Significance:")
|
||||
print(cor_test$r)
|
||||
print("\nP-values:")
|
||||
print(cor_test$P)
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
76
.history/eohi1/correlation matrix_20251027120505.r
Normal file
76
.history/eohi1/correlation matrix_20251027120505.r
Normal file
@ -0,0 +1,76 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations
|
||||
library(Hmisc)
|
||||
cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
|
||||
|
||||
# Print correlation matrix with significance
|
||||
print("Correlation Matrix with Significance:")
|
||||
print(cor_test$r)
|
||||
print("\nP-values:")
|
||||
print(cor_test$P)
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
97
.history/eohi1/correlation matrix_20251027120720.r
Normal file
97
.history/eohi1/correlation matrix_20251027120720.r
Normal file
@ -0,0 +1,97 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print significant correlations (p < 0.05)
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
97
.history/eohi1/correlation matrix_20251027120722.r
Normal file
97
.history/eohi1/correlation matrix_20251027120722.r
Normal file
@ -0,0 +1,97 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print significant correlations (p < 0.05)
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix to CSV
|
||||
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
99
.history/eohi1/correlation matrix_20251027120752.r
Normal file
99
.history/eohi1/correlation matrix_20251027120752.r
Normal file
@ -0,0 +1,99 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print significant correlations (p < 0.05)
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
99
.history/eohi1/correlation matrix_20251027120754.r
Normal file
99
.history/eohi1/correlation matrix_20251027120754.r
Normal file
@ -0,0 +1,99 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print significant correlations (p < 0.05)
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
99
.history/eohi1/correlation matrix_20251027120804.r
Normal file
99
.history/eohi1/correlation matrix_20251027120804.r
Normal file
@ -0,0 +1,99 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix:")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print significant correlations (p < 0.05)
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
111
.history/eohi1/correlation matrix_20251027120919.r
Normal file
111
.history/eohi1/correlation matrix_20251027120919.r
Normal file
@ -0,0 +1,111 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix (Spearman r values):")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values Matrix:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
print("\nAll correlations with r and p values:")
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
111
.history/eohi1/correlation matrix_20251027120930.r
Normal file
111
.history/eohi1/correlation matrix_20251027120930.r
Normal file
@ -0,0 +1,111 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix (Spearman r values):")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values Matrix:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
print("\nAll correlations with r and p values:")
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
111
.history/eohi1/correlation matrix_20251027120933.r
Normal file
111
.history/eohi1/correlation matrix_20251027120933.r
Normal file
@ -0,0 +1,111 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print("Correlation Matrix (Spearman r values):")
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print("\nP-values Matrix:")
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
print("\nAll correlations with r and p values:")
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
print("\nSignificant correlations (p < 0.05):")
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
print("No significant correlations found.")
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
105
.history/eohi1/correlation matrix_20251027120955.r
Normal file
105
.history/eohi1/correlation matrix_20251027120955.r
Normal file
@ -0,0 +1,105 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
105
.history/eohi1/correlation matrix_20251027120958.r
Normal file
105
.history/eohi1/correlation matrix_20251027120958.r
Normal file
@ -0,0 +1,105 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
105
.history/eohi1/correlation matrix_20251027121016.r
Normal file
105
.history/eohi1/correlation matrix_20251027121016.r
Normal file
@ -0,0 +1,105 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
105
.history/eohi1/correlation matrix_20251027134544.r
Normal file
105
.history/eohi1/correlation matrix_20251027134544.r
Normal file
@ -0,0 +1,105 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
print("Correlation matrix saved to correlation_matrix.csv")
|
||||
print("P-values saved to correlation_pvalues.csv")
|
||||
103
.history/eohi1/correlation matrix_20251029115844.r
Normal file
103
.history/eohi1/correlation matrix_20251029115844.r
Normal file
@ -0,0 +1,103 @@
|
||||
options(scipen = 999)
|
||||
|
||||
library(dplyr)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
|
||||
df <- read.csv("ehi1.csv")
|
||||
|
||||
data <- df %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
|
||||
filter(demo_sex != "Prefer not to say")
|
||||
|
||||
print(colSums(is.na(data)))
|
||||
print(sapply(data, class))
|
||||
|
||||
# Create dummy variable for sex (0 = Male, 1 = Female)
|
||||
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
|
||||
|
||||
# Verify the dummy coding
|
||||
print(table(data$demo_sex, data$sex_dummy))
|
||||
|
||||
#descriptives
|
||||
|
||||
# Descriptives for age
|
||||
print(summary(data$demo_age_1))
|
||||
print(sd(data$demo_age_1, na.rm = TRUE))
|
||||
|
||||
# Center demo_age_1 (subtract the mean)
|
||||
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
|
||||
|
||||
# Verify the centering
|
||||
print(summary(data$age_centered))
|
||||
|
||||
# Descriptives for sex (frequency table)
|
||||
print(table(data$demo_sex))
|
||||
print(prop.table(table(data$demo_sex)))
|
||||
|
||||
# Descriptives for sex dummy variable
|
||||
print(table(data$sex_dummy))
|
||||
|
||||
# Convert edu3 to numeric factor for correlations (1, 2, 3)
|
||||
# First ensure edu3 is a factor, then convert to numeric
|
||||
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
|
||||
data$edu_num <- as.numeric(data$edu3)
|
||||
|
||||
# Check the numeric conversion
|
||||
print(table(data$edu_num, useNA = "ifany"))
|
||||
|
||||
# Verify the conversion
|
||||
print(table(data$edu3, data$edu_num, useNA = "ifany"))
|
||||
|
||||
####correlation matrix ####
|
||||
|
||||
# Select numeric variables for correlation matrix
|
||||
numeric_vars <- data %>%
|
||||
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
|
||||
|
||||
# Create Spearman correlation matrix
|
||||
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_matrix, 3))
|
||||
|
||||
# Get significance tests for correlations using psych package
|
||||
library(psych)
|
||||
|
||||
# Create correlation matrix with significance tests
|
||||
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
|
||||
|
||||
# Print correlation matrix
|
||||
print(round(cor_test$r, 3))
|
||||
|
||||
# Print p-values
|
||||
print(round(cor_test$p, 3))
|
||||
|
||||
# Print all correlations with r and p values (for reporting)
|
||||
for(i in 1:nrow(cor_test$r)) {
|
||||
for(j in 1:ncol(cor_test$r)) {
|
||||
if(i != j) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
|
||||
": r =", round(cor_test$r[i, j], 3),
|
||||
", p =", round(cor_test$p[i, j], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Also print significant correlations summary
|
||||
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
|
||||
if(nrow(sig_cors) > 0) {
|
||||
for(i in 1:nrow(sig_cors)) {
|
||||
row_idx <- sig_cors[i, 1]
|
||||
col_idx <- sig_cors[i, 2]
|
||||
if(row_idx != col_idx) { # Skip diagonal
|
||||
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
|
||||
": r =", round(cor_test$r[row_idx, col_idx], 3),
|
||||
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Save correlation matrix and p-values to CSV files
|
||||
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
|
||||
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
|
||||
@ -0,0 +1,67 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# If you want to export CSVs, uncomment:
|
||||
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
|
||||
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)
|
||||
@ -0,0 +1,67 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# If you want to export CSVs, uncomment:
|
||||
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
|
||||
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)
|
||||
@ -0,0 +1,74 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# If you want to export CSVs, uncomment:
|
||||
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
|
||||
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)
|
||||
@ -0,0 +1,74 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# If you want to export CSVs, uncomment:
|
||||
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
|
||||
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)
|
||||
@ -0,0 +1,74 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# If you want to export CSVs, uncomment:
|
||||
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
|
||||
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
|
||||
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)
|
||||
@ -0,0 +1,75 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,75 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,75 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (Pearson), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars) {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
|
||||
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,86 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Pearson)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
|
||||
|
||||
# Compute correlations (Spearman)
|
||||
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_s_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_s_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
corr_s_bs_eohi$group <- "EOHI"
|
||||
corr_s_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,76 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,76 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,76 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,76 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
|
||||
# Export a single CSV combining both sets
|
||||
corr_bs_eohi$group <- "EOHI"
|
||||
corr_bs_cal$group <- "Calibration"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,81 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
wide_cal_eohi <- to_wide(corr_cal_eohi)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_cal_eohi)
|
||||
|
||||
# Export a single CSV combining all sets
|
||||
corr_bs_eohi$group <- "BS_vs_EOHI"
|
||||
corr_bs_cal$group <- "BS_vs_Cal"
|
||||
corr_cal_eohi$group <- "Cal_vs_EOHI"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,81 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
wide_cal_eohi <- to_wide(corr_cal_eohi)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_cal_eohi)
|
||||
|
||||
# Export a single CSV combining all sets
|
||||
corr_bs_eohi$group <- "BS_vs_EOHI"
|
||||
corr_bs_cal$group <- "BS_vs_Cal"
|
||||
corr_cal_eohi$group <- "Cal_vs_EOHI"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,81 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("ehi1.csv")
|
||||
|
||||
# Keep only required columns for the analysis
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
|
||||
|
||||
# --- Brier score correlations vs EOHIs and Calibration ---
|
||||
|
||||
# Variables
|
||||
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
|
||||
eohi_vars <- c(
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
|
||||
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
|
||||
)
|
||||
cal_vars <- c("cal_selfActual","cal_global")
|
||||
|
||||
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
|
||||
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
|
||||
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
|
||||
results <- purrr::pmap_dfr(grid, function(x, y) {
|
||||
xv <- df[[x]]; yv <- df[[y]]
|
||||
ok <- is.finite(xv) & is.finite(yv)
|
||||
if (sum(ok) < 3) {
|
||||
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
|
||||
}
|
||||
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
|
||||
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
|
||||
})
|
||||
dplyr::arrange(results, var_x, var_y)
|
||||
}
|
||||
|
||||
# Compute correlations (Spearman only)
|
||||
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
|
||||
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
|
||||
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
|
||||
|
||||
# Wide r-only tables (optional)
|
||||
to_wide <- function(d) {
|
||||
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
|
||||
}
|
||||
wide_bs_eohi <- to_wide(corr_bs_eohi)
|
||||
wide_bs_cal <- to_wide(corr_bs_cal)
|
||||
wide_cal_eohi <- to_wide(corr_cal_eohi)
|
||||
|
||||
# Display
|
||||
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_bs_eohi)
|
||||
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
|
||||
print(corr_bs_cal)
|
||||
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
|
||||
print(corr_cal_eohi)
|
||||
|
||||
# Export a single CSV combining all sets
|
||||
corr_bs_eohi$group <- "BS_vs_EOHI"
|
||||
corr_bs_cal$group <- "BS_vs_Cal"
|
||||
corr_cal_eohi$group <- "Cal_vs_EOHI"
|
||||
|
||||
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
|
||||
dplyr::relocate(group, .before = var_x)
|
||||
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)
|
||||
@ -0,0 +1,304 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,304 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,304 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,306 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
str(df)
|
||||
summary(df)
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,307 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
str(df)
|
||||
summary(df)
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,307 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
str(df)
|
||||
summary(df)
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,307 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
str(df)
|
||||
summary(df)
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,307 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
str(df)
|
||||
summary(df)
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
|
||||
####==== EFFECT SIZES (Cohen's conventions) ====
|
||||
|
||||
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
|
||||
cat("Small effect: |r| = 0.10\n")
|
||||
cat("Medium effect: |r| = 0.30\n")
|
||||
cat("Large effect: |r| = 0.50\n")
|
||||
|
||||
# Categorize effect sizes
|
||||
summary_table_with_effects <- summary_table %>%
|
||||
mutate(
|
||||
pearson_effect_size = case_when(
|
||||
abs(pearson_r) >= 0.50 ~ "Large",
|
||||
abs(pearson_r) >= 0.30 ~ "Medium",
|
||||
abs(pearson_r) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
),
|
||||
spearman_effect_size = case_when(
|
||||
abs(spearman_rho) >= 0.50 ~ "Large",
|
||||
abs(spearman_rho) >= 0.30 ~ "Medium",
|
||||
abs(spearman_rho) >= 0.10 ~ "Small",
|
||||
TRUE ~ "Negligible"
|
||||
)
|
||||
)
|
||||
|
||||
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
|
||||
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))
|
||||
@ -0,0 +1,280 @@
|
||||
# Load required libraries
|
||||
library(Hmisc)
|
||||
library(knitr)
|
||||
library(dplyr)
|
||||
library(corrr)
|
||||
library(broom)
|
||||
library(purrr)
|
||||
library(tidyr)
|
||||
library(tibble)
|
||||
library(boot)
|
||||
|
||||
options(scipen = 999)
|
||||
|
||||
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
|
||||
# Load data
|
||||
df1 <- read.csv("exp1.csv")
|
||||
|
||||
# Remove columns with all NA values
|
||||
df1 <- df1 %>% select(where(~ !all(is.na(.))))
|
||||
|
||||
# Select variables of interest
|
||||
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
|
||||
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
|
||||
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
|
||||
|
||||
# Create dataset with selected variables
|
||||
df <- df1[, c(eohi_vars, cal_vars)]
|
||||
|
||||
# Ensure all selected variables are numeric
|
||||
df <- df %>%
|
||||
mutate(across(everything(), as.numeric))
|
||||
|
||||
# Remove rows with any missing values for correlation analysis
|
||||
df_complete <- df[complete.cases(df), ]
|
||||
|
||||
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
|
||||
cat("Total sample size:", nrow(df), "\n")
|
||||
|
||||
str(df)
|
||||
summary(df)
|
||||
####==== DESCRIPTIVE STATISTICS ====
|
||||
|
||||
# Function to compute descriptive statistics
|
||||
get_descriptives <- function(data, vars) {
|
||||
desc_stats <- data %>%
|
||||
select(all_of(vars)) %>%
|
||||
summarise(across(everything(), list(
|
||||
n = ~sum(!is.na(.)),
|
||||
mean = ~mean(., na.rm = TRUE),
|
||||
sd = ~sd(., na.rm = TRUE),
|
||||
min = ~min(., na.rm = TRUE),
|
||||
max = ~max(., na.rm = TRUE),
|
||||
median = ~median(., na.rm = TRUE),
|
||||
q25 = ~quantile(., 0.25, na.rm = TRUE),
|
||||
q75 = ~quantile(., 0.75, na.rm = TRUE)
|
||||
))) %>%
|
||||
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
|
||||
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
|
||||
pivot_wider(names_from = stat, values_from = value) %>%
|
||||
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
|
||||
|
||||
return(desc_stats)
|
||||
}
|
||||
|
||||
# Get descriptives for EOHI variables
|
||||
eohi_descriptives <- get_descriptives(df, eohi_vars)
|
||||
cat("\n=== EOHI Variables Descriptives ===\n")
|
||||
print(eohi_descriptives)
|
||||
|
||||
# Get descriptives for calibration variables
|
||||
cal_descriptives <- get_descriptives(df, cal_vars)
|
||||
cat("\n=== Calibration Variables Descriptives ===\n")
|
||||
print(cal_descriptives)
|
||||
|
||||
####==== PEARSON CORRELATIONS ====
|
||||
|
||||
# Compute correlation matrix with p-values
|
||||
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_pearson <- cor_results_pearson$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_pearson <- cor_results_pearson$P
|
||||
|
||||
# Function to add significance stars
|
||||
corstars <- function(cor_mat, p_mat) {
|
||||
stars <- ifelse(p_mat < 0.001, "***",
|
||||
ifelse(p_mat < 0.01, "**",
|
||||
ifelse(p_mat < 0.05, "*", "")))
|
||||
|
||||
# Combine correlation values with stars, rounded to 5 decimal places
|
||||
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
|
||||
nrow = nrow(cor_mat))
|
||||
|
||||
# Set row and column names
|
||||
rownames(cor_with_stars) <- rownames(cor_mat)
|
||||
colnames(cor_with_stars) <- colnames(cor_mat)
|
||||
|
||||
return(cor_with_stars)
|
||||
}
|
||||
|
||||
# Apply the function
|
||||
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
|
||||
|
||||
cat("\n=== PEARSON CORRELATIONS ===\n")
|
||||
print(cor_table_pearson, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations)) {
|
||||
cor_val <- eohi_cal_correlations[i, j]
|
||||
p_val <- eohi_cal_pvalues[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations)[i],
|
||||
colnames(eohi_cal_correlations)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== SPEARMAN CORRELATIONS ====
|
||||
|
||||
# Compute Spearman correlation matrix with p-values
|
||||
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
|
||||
|
||||
# Extract correlation coefficients
|
||||
cor_spearman <- cor_results_spearman$r
|
||||
|
||||
# Extract p-values
|
||||
p_matrix_spearman <- cor_results_spearman$P
|
||||
|
||||
# Apply the function
|
||||
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
|
||||
|
||||
cat("\n=== SPEARMAN CORRELATIONS ===\n")
|
||||
print(cor_table_spearman, quote = FALSE)
|
||||
|
||||
# Extract specific correlations between EOHI and calibration variables
|
||||
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
|
||||
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
|
||||
|
||||
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
|
||||
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
|
||||
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
|
||||
cor_val <- eohi_cal_correlations_spearman[i, j]
|
||||
p_val <- eohi_cal_pvalues_spearman[i, j]
|
||||
star <- ifelse(p_val < 0.001, "***",
|
||||
ifelse(p_val < 0.01, "**",
|
||||
ifelse(p_val < 0.05, "*", "")))
|
||||
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
|
||||
rownames(eohi_cal_correlations_spearman)[i],
|
||||
colnames(eohi_cal_correlations_spearman)[j],
|
||||
cor_val, star, p_val))
|
||||
}
|
||||
}
|
||||
|
||||
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
|
||||
|
||||
# Function to compute correlation with bootstrap CI
|
||||
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
|
||||
# Remove missing values
|
||||
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
|
||||
|
||||
if(nrow(complete_data) < 3) {
|
||||
return(data.frame(
|
||||
correlation = NA,
|
||||
ci_lower = NA,
|
||||
ci_upper = NA,
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Bootstrap function
|
||||
boot_fun <- function(data, indices) {
|
||||
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
|
||||
}
|
||||
|
||||
# Perform bootstrap
|
||||
set.seed(123) # for reproducibility
|
||||
boot_results <- boot(complete_data, boot_fun, R = R)
|
||||
|
||||
# Calculate confidence interval
|
||||
ci <- boot.ci(boot_results, type = "perc")
|
||||
|
||||
return(data.frame(
|
||||
correlation = boot_results$t0,
|
||||
ci_lower = ci$perc[4],
|
||||
ci_upper = ci$perc[5],
|
||||
n = nrow(complete_data)
|
||||
))
|
||||
}
|
||||
|
||||
# Compute bootstrap CIs for all EOHI x Calibration correlations
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
|
||||
bootstrap_results_pearson <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "pearson"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_pearson)
|
||||
|
||||
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
|
||||
bootstrap_results_spearman <- expand.grid(
|
||||
eohi_var = eohi_vars,
|
||||
cal_var = cal_vars,
|
||||
stringsAsFactors = FALSE
|
||||
) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
|
||||
result$eohi_var <- eohi_var
|
||||
result$cal_var <- cal_var
|
||||
result$method <- "spearman"
|
||||
return(result)
|
||||
}) %>%
|
||||
mutate(
|
||||
correlation = round(correlation, 5),
|
||||
ci_lower = round(ci_lower, 5),
|
||||
ci_upper = round(ci_upper, 5)
|
||||
)
|
||||
|
||||
print(bootstrap_results_spearman)
|
||||
|
||||
####==== SUMMARY TABLE ====
|
||||
|
||||
# Create comprehensive summary table
|
||||
summary_table <- bootstrap_results_pearson %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
|
||||
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
|
||||
left_join(
|
||||
bootstrap_results_spearman %>%
|
||||
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
|
||||
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
# Add p-values
|
||||
left_join(
|
||||
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
|
||||
pmap_dfr(function(eohi_var, cal_var) {
|
||||
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
|
||||
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
|
||||
data.frame(
|
||||
eohi_var = eohi_var,
|
||||
cal_var = cal_var,
|
||||
pearson_p = pearson_p,
|
||||
spearman_p = spearman_p
|
||||
)
|
||||
}),
|
||||
by = c("eohi_var", "cal_var")
|
||||
) %>%
|
||||
mutate(
|
||||
pearson_p = round(pearson_p, 5),
|
||||
spearman_p = round(spearman_p, 5)
|
||||
)
|
||||
|
||||
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
|
||||
print(summary_table)
|
||||
|
||||
# Save results to CSV
|
||||
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
|
||||
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user