Initial commit

This commit is contained in:
Irina Levit 2025-12-23 15:47:09 -05:00
commit f8eb3da04d
1543 changed files with 573824 additions and 0 deletions

View File

@ -0,0 +1,118 @@
Option Explicit
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
If IsError(m) Then
GetColIndex = 0
Else
GetColIndex = CLng(m)
End If
End Function
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
Dim tmp() As Long
ReDim tmp(0 To UBound(headers))
Dim i As Long, c As Long
c = 0
For i = LBound(headers) To UBound(headers)
Dim colIdx As Long
colIdx = GetColIndex(CStr(headers(i)), ws)
If colIdx > 0 Then
tmp(c) = colIdx
c = c + 1
End If
Next i
If c = 0 Then
BuildPresentColArray = Array()
Else
Dim outArr() As Long
ReDim outArr(0 To c - 1)
For i = 0 To c - 1
outArr(i) = tmp(i)
Next i
BuildPresentColArray = outArr
End If
End Function
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
Dim i As Long
Dim sumVals As Double
Dim countVals As Long
sumVals = 0
countVals = 0
If IsArray(colIndexes) Then
For i = LBound(colIndexes) To UBound(colIndexes)
Dim v As Variant
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
If Not IsError(v) Then
If IsNumeric(v) Then
sumVals = sumVals + CDbl(v)
countVals = countVals + 1
End If
End If
Next i
End If
If countVals = 0 Then
MeanOfRow = CVErr(xlErrNA)
Else
MeanOfRow = sumVals / countVals
End If
End Function
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
Dim c As Long
c = GetColIndex(headerName, ws)
If c = 0 Then
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
c = lastCol + 1
ws.Cells(1, c).Value = headerName
End If
EnsureOutputColumn = c
End Function
Sub BS_Means()
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim all28 As Variant
all28 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim easy14 As Variant
easy14 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
)
Dim hard14 As Variant
hard14 = Array( _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
colsAll = BuildPresentColArray(all28, ws)
colsEasy = BuildPresentColArray(easy14, ws)
colsHard = BuildPresentColArray(hard14, ws)
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
colBS28 = EnsureOutputColumn(ws, "bs_28")
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
colBSHard = EnsureOutputColumn(ws, "bs_hard")
Dim lastRow As Long
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
Dim r As Long
For r = 2 To lastRow
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
Next r
End Sub

View File

@ -0,0 +1,118 @@
Option Explicit
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
If IsError(m) Then
GetColIndex = 0
Else
GetColIndex = CLng(m)
End If
End Function
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
Dim tmp() As Long
ReDim tmp(0 To UBound(headers))
Dim i As Long, c As Long
c = 0
For i = LBound(headers) To UBound(headers)
Dim colIdx As Long
colIdx = GetColIndex(CStr(headers(i)), ws)
If colIdx > 0 Then
tmp(c) = colIdx
c = c + 1
End If
Next i
If c = 0 Then
BuildPresentColArray = Array()
Else
Dim outArr() As Long
ReDim outArr(0 To c - 1)
For i = 0 To c - 1
outArr(i) = tmp(i)
Next i
BuildPresentColArray = outArr
End If
End Function
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
Dim i As Long
Dim sumVals As Double
Dim countVals As Long
sumVals = 0
countVals = 0
If IsArray(colIndexes) Then
For i = LBound(colIndexes) To UBound(colIndexes)
Dim v As Variant
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
If Not IsError(v) Then
If IsNumeric(v) Then
sumVals = sumVals + CDbl(v)
countVals = countVals + 1
End If
End If
Next i
End If
If countVals = 0 Then
MeanOfRow = CVErr(xlErrNA)
Else
MeanOfRow = sumVals / countVals
End If
End Function
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
Dim c As Long
c = GetColIndex(headerName, ws)
If c = 0 Then
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
c = lastCol + 1
ws.Cells(1, c).Value = headerName
End If
EnsureOutputColumn = c
End Function
Sub BS_Means()
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim all28 As Variant
all28 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim easy14 As Variant
easy14 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
)
Dim hard14 As Variant
hard14 = Array( _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
colsAll = BuildPresentColArray(all28, ws)
colsEasy = BuildPresentColArray(easy14, ws)
colsHard = BuildPresentColArray(hard14, ws)
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
colBS28 = EnsureOutputColumn(ws, "bs_28")
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
colBSHard = EnsureOutputColumn(ws, "bs_hard")
Dim lastRow As Long
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
Dim r As Long
For r = 2 To lastRow
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
Next r
End Sub

View File

@ -0,0 +1,118 @@
Option Explicit
Private Function GetColIndex(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
If IsError(m) Then
GetColIndex = 0
Else
GetColIndex = CLng(m)
End If
End Function
Private Function BuildPresentColArray(ByVal headers As Variant, ByVal ws As Worksheet) As Variant
Dim tmp() As Long
ReDim tmp(0 To UBound(headers))
Dim i As Long, c As Long
c = 0
For i = LBound(headers) To UBound(headers)
Dim colIdx As Long
colIdx = GetColIndex(CStr(headers(i)), ws)
If colIdx > 0 Then
tmp(c) = colIdx
c = c + 1
End If
Next i
If c = 0 Then
BuildPresentColArray = Array()
Else
Dim outArr() As Long
ReDim outArr(0 To c - 1)
For i = 0 To c - 1
outArr(i) = tmp(i)
Next i
BuildPresentColArray = outArr
End If
End Function
Private Function MeanOfRow(ByVal ws As Worksheet, ByVal rowIndex As Long, ByVal colIndexes As Variant) As Variant
Dim i As Long
Dim sumVals As Double
Dim countVals As Long
sumVals = 0
countVals = 0
If IsArray(colIndexes) Then
For i = LBound(colIndexes) To UBound(colIndexes)
Dim v As Variant
v = ws.Cells(rowIndex, CLng(colIndexes(i))).Value
If Not IsError(v) Then
If IsNumeric(v) Then
sumVals = sumVals + CDbl(v)
countVals = countVals + 1
End If
End If
Next i
End If
If countVals = 0 Then
MeanOfRow = CVErr(xlErrNA)
Else
MeanOfRow = sumVals / countVals
End If
End Function
Private Function EnsureOutputColumn(ByVal ws As Worksheet, ByVal headerName As String) As Long
Dim c As Long
c = GetColIndex(headerName, ws)
If c = 0 Then
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
c = lastCol + 1
ws.Cells(1, c).Value = headerName
End If
EnsureOutputColumn = c
End Function
Sub BS_Means()
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim all28 As Variant
all28 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim easy14 As Variant
easy14 = Array( _
"lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy" _
)
Dim hard14 As Variant
hard14 = Array( _
"spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard" _
)
Dim colsAll As Variant, colsEasy As Variant, colsHard As Variant
colsAll = BuildPresentColArray(all28, ws)
colsEasy = BuildPresentColArray(easy14, ws)
colsHard = BuildPresentColArray(hard14, ws)
Dim colBS28 As Long, colBSEasy As Long, colBSHard As Long
colBS28 = EnsureOutputColumn(ws, "bs_28")
colBSEasy = EnsureOutputColumn(ws, "bs_easy")
colBSHard = EnsureOutputColumn(ws, "bs_hard")
Dim lastRow As Long
lastRow = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
Dim r As Long
For r = 2 To lastRow
ws.Cells(r, colBS28).Value = MeanOfRow(ws, r, colsAll)
ws.Cells(r, colBSEasy).Value = MeanOfRow(ws, r, colsEasy)
ws.Cells(r, colBSHard).Value = MeanOfRow(ws, r, colsHard)
Next r
End Sub

View File

@ -0,0 +1,25 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load the data
exp1_data <- read.csv("exp1.csv")
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
# Save the updated data
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
# Display summary of the calculated totals
cat("NPast_mean_total summary:\n")
summary(exp1_data$NPast_mean_total)
cat("\nNFut_mean_total summary:\n")
summary(exp1_data$NFut_mean_total)
# Show first few rows to verify calculations
cat("\nFirst 5 rows of calculated totals:\n")
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])

View File

@ -0,0 +1,25 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load the data
exp1_data <- read.csv("exp1.csv")
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
# Save the updated data
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
# Display summary of the calculated totals
cat("NPast_mean_total summary:\n")
summary(exp1_data$NPast_mean_total)
cat("\nNFut_mean_total summary:\n")
summary(exp1_data$NFut_mean_total)
# Show first few rows to verify calculations
cat("\nFirst 5 rows of calculated totals:\n")
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])

View File

@ -0,0 +1,25 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load the data
exp1_data <- read.csv("exp1.csv")
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
# Save the updated data
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
# Display summary of the calculated totals
cat("NPast_mean_total summary:\n")
summary(exp1_data$NPast_mean_total)
cat("\nNFut_mean_total summary:\n")
summary(exp1_data$NFut_mean_total)
# Show first few rows to verify calculations
cat("\nFirst 5 rows of calculated totals:\n")
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])

View File

@ -0,0 +1,25 @@
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load the data
exp1_data <- read.csv("exp1.csv")
# Calculate NPast_mean_total as average of NPast_mean_pref, NPast_mean_pers, NPast_mean_val, NPast_mean_life
exp1_data$NPast_mean_total <- rowMeans(exp1_data[, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life")], na.rm = TRUE)
# Calculate NFut_mean_total as average of NFut_mean_pref, NFut_mean_pers, NFut_mean_val, NFut_mean_life
exp1_data$NFut_mean_total <- rowMeans(exp1_data[, c("NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")], na.rm = TRUE)
# Save the updated data
write.csv(exp1_data, "exp1.csv", row.names = FALSE)
# Display summary of the calculated totals
cat("NPast_mean_total summary:\n")
summary(exp1_data$NPast_mean_total)
cat("\nNFut_mean_total summary:\n")
summary(exp1_data$NFut_mean_total)
# Show first few rows to verify calculations
cat("\nFirst 5 rows of calculated totals:\n")
print(exp1_data[1:5, c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life", "NPast_mean_total",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life", "NFut_mean_total")])

View File

@ -0,0 +1,82 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```

View File

@ -0,0 +1,121 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```

View File

@ -0,0 +1,175 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```

View File

@ -0,0 +1,205 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```

View File

@ -0,0 +1,310 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```

View File

@ -0,0 +1,348 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```

View File

@ -0,0 +1,430 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```

View File

@ -0,0 +1,497 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```
# Effect Sizes (Cohen's d)
## Main Effects
```{r cohens-d-main}
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
```
```{r cohens-d-domain}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
```

View File

@ -0,0 +1,573 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```
# Effect Sizes (Cohen's d)
## Main Effects
```{r cohens-d-main}
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
```
```{r cohens-d-domain}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
```
## Two-Way Interactions
```{r cohens-d-function}
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
```
```{r interaction-effects}
# Note: These sections would need the actual simple effects results from your analysis
# The original script references undefined variables: temporal_time_simple and time_domain_simple
# These would need to be calculated using emmeans for simple effects
# 1. TEMPORAL_DO × TIME INTERACTION
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# 2. TIME × DOMAIN INTERACTION
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
```

View File

@ -0,0 +1,660 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```
# Effect Sizes (Cohen's d)
## Main Effects
```{r cohens-d-main}
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
```
```{r cohens-d-domain}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
```
## Two-Way Interactions
```{r cohens-d-function}
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
```
```{r interaction-effects}
# Note: These sections would need the actual simple effects results from your analysis
# The original script references undefined variables: temporal_time_simple and time_domain_simple
# These would need to be calculated using emmeans for simple effects
# 1. TEMPORAL_DO × TIME INTERACTION
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# 2. TIME × DOMAIN INTERACTION
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
```
# Interaction Plot
```{r interaction-plot}
# Define color palette for DOMAIN (4 levels)
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
# Define TIME levels (Past, Future order)
time_levels <- c("Past", "Future")
# Create estimated marginal means for DOMAIN x TIME
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
# Prepare emmeans data frame
emmeans_data2 <- emm_full %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
iPlot <- long_data_clean %>%
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
# Plot without TEMPORAL_DO facet
interaction_plot2 <- ggplot() +
geom_point(
data = iPlot,
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
alpha = 0.3, shape = 16
) +
geom_rect(
data = emmeans_data2,
aes(
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
ymin = ci_lower, ymax = ci_upper,
fill = DOMAIN
),
color = "black", alpha = 0.5
) +
geom_segment(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = ci_lower, yend = ci_upper
),
color = "black"
) +
geom_point(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = plot_mean,
color = DOMAIN,
shape = DOMAIN
),
size = 2.5, stroke = 0.8, fill = "black"
) +
labs(
x = "TIME", y = "Mean Difference",
title = "DOMAIN × TIME Interaction", subtitle = ""
) +
scale_color_manual(name = "DOMAIN", values = cbp1) +
scale_fill_manual(name = "DOMAIN", values = cbp1) +
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
plot.title = element_text(size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5)
)
print(interaction_plot2)
```

View File

@ -0,0 +1,660 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```
# Effect Sizes (Cohen's d)
## Main Effects
```{r cohens-d-main}
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
```
```{r cohens-d-domain}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
```
## Two-Way Interactions
```{r cohens-d-function}
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
```
```{r interaction-effects}
# Note: These sections would need the actual simple effects results from your analysis
# The original script references undefined variables: temporal_time_simple and time_domain_simple
# These would need to be calculated using emmeans for simple effects
# 1. TEMPORAL_DO × TIME INTERACTION
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# 2. TIME × DOMAIN INTERACTION
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
```
# Interaction Plot
```{r interaction-plot}
# Define color palette for DOMAIN (4 levels)
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
# Define TIME levels (Past, Future order)
time_levels <- c("Past", "Future")
# Create estimated marginal means for DOMAIN x TIME
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
# Prepare emmeans data frame
emmeans_data2 <- emm_full %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
iPlot <- long_data_clean %>%
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
# Plot without TEMPORAL_DO facet
interaction_plot2 <- ggplot() +
geom_point(
data = iPlot,
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
alpha = 0.3, shape = 16
) +
geom_rect(
data = emmeans_data2,
aes(
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
ymin = ci_lower, ymax = ci_upper,
fill = DOMAIN
),
color = "black", alpha = 0.5
) +
geom_segment(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = ci_lower, yend = ci_upper
),
color = "black"
) +
geom_point(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = plot_mean,
color = DOMAIN,
shape = DOMAIN
),
size = 2.5, stroke = 0.8, fill = "black"
) +
labs(
x = "TIME", y = "Mean Difference",
title = "DOMAIN × TIME Interaction", subtitle = ""
) +
scale_color_manual(name = "DOMAIN", values = cbp1) +
scale_fill_manual(name = "DOMAIN", values = cbp1) +
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
plot.title = element_text(size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5)
)
print(interaction_plot2)
```

View File

@ -0,0 +1,660 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```
# Effect Sizes (Cohen's d)
## Main Effects
```{r cohens-d-main}
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
```
```{r cohens-d-domain}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
```
## Two-Way Interactions
```{r cohens-d-function}
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
```
```{r interaction-effects}
# Note: These sections would need the actual simple effects results from your analysis
# The original script references undefined variables: temporal_time_simple and time_domain_simple
# These would need to be calculated using emmeans for simple effects
# 1. TEMPORAL_DO × TIME INTERACTION
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# 2. TIME × DOMAIN INTERACTION
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
```
# Interaction Plot
```{r interaction-plot}
# Define color palette for DOMAIN (4 levels)
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
# Define TIME levels (Past, Future order)
time_levels <- c("Past", "Future")
# Create estimated marginal means for DOMAIN x TIME
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
# Prepare emmeans data frame
emmeans_data2 <- emm_full %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
iPlot <- long_data_clean %>%
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
# Plot without TEMPORAL_DO facet
interaction_plot2 <- ggplot() +
geom_point(
data = iPlot,
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
alpha = 0.3, shape = 16
) +
geom_rect(
data = emmeans_data2,
aes(
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
ymin = ci_lower, ymax = ci_upper,
fill = DOMAIN
),
color = "black", alpha = 0.5
) +
geom_segment(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = ci_lower, yend = ci_upper
),
color = "black"
) +
geom_point(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = plot_mean,
color = DOMAIN,
shape = DOMAIN
),
size = 2.5, stroke = 0.8, fill = "black"
) +
labs(
x = "TIME", y = "Mean Difference",
title = "DOMAIN × TIME Interaction", subtitle = ""
) +
scale_color_manual(name = "DOMAIN", values = cbp1) +
scale_fill_manual(name = "DOMAIN", values = cbp1) +
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
plot.title = element_text(size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5)
)
print(interaction_plot2)
```

View File

@ -0,0 +1,660 @@
---
title: "Mixed ANOVA Analysis for Domain Means"
author: "Irina"
date: "`r Sys.Date()`"
output:
html_document:
toc: true
toc_float: true
code_folding: hide
theme: flatly
highlight: tango
fig_width: 10
fig_height: 6
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
```
# Introduction
This analysis examines domain-level differences in mean scores across time periods using a mixed ANOVA design. The analysis focuses on four domains (Preferences, Personality, Values, Life) across two time periods (Past, Future) with a between-subjects factor (TEMPORAL_DO).
# Data Preparation and Setup
```{r libraries}
library(tidyverse)
library(ez)
library(car)
library(nortest) # For normality tests
library(emmeans) # For post-hoc comparisons
library(purrr) # For map functions
library(effsize) # For Cohen's d calculations
library(ggplot2) # For plotting
options(scipen = 999)
options(contrasts = c("contr.sum", "contr.poly"))
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
```
```{r data-loading}
# Read the data
data <- read.csv("exp1.csv")
required_vars <- c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life")
# Define domain mapping
domain_mapping <- data.frame(
variable = c("NPast_mean_pref", "NPast_mean_pers", "NPast_mean_val", "NPast_mean_life",
"NFut_mean_pref", "NFut_mean_pers", "NFut_mean_val", "NFut_mean_life"),
time = c(rep("Past", 4), rep("Future", 4)),
domain = rep(c("Preferences", "Personality", "Values", "Life"), 2),
stringsAsFactors = FALSE
)
```
```{r data-reshaping}
long_data <- data %>%
select(pID, ResponseId, TEMPORAL_DO, all_of(required_vars)) %>%
pivot_longer(
cols = all_of(required_vars),
names_to = "variable",
values_to = "MEAN_DIFFERENCE"
) %>%
left_join(domain_mapping, by = "variable") %>%
# Convert to factors with proper levels (note: columns are 'time' and 'domain' from mapping)
mutate(
TIME = factor(time, levels = c("Past", "Future")),
DOMAIN = factor(domain, levels = c("Preferences", "Personality", "Values", "Life")),
pID = as.factor(pID),
TEMPORAL_DO = as.factor(TEMPORAL_DO)
) %>%
# Select final columns and remove any rows with missing values
select(pID, ResponseId, TEMPORAL_DO, TIME, DOMAIN, MEAN_DIFFERENCE) %>%
filter(!is.na(MEAN_DIFFERENCE))
# Create clean dataset for analysis (fixing the reference issue)
long_data_clean <- long_data
```
# Descriptive Statistics
## Overall Descriptive Statistics by TIME and DOMAIN
```{r descriptive-stats}
desc_stats <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
median = round(median(MEAN_DIFFERENCE, na.rm = TRUE), 5),
q1 = round(quantile(MEAN_DIFFERENCE, 0.25, na.rm = TRUE), 5),
q3 = round(quantile(MEAN_DIFFERENCE, 0.75, na.rm = TRUE), 5),
min = round(min(MEAN_DIFFERENCE, na.rm = TRUE), 5),
max = round(max(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats)
```
## Descriptive Statistics by Between-Subjects Factors
```{r descriptive-stats-temporal}
desc_stats_by_temporal <- long_data %>%
group_by(TEMPORAL_DO, TIME, DOMAIN) %>%
summarise(
n = n(),
mean = round(mean(MEAN_DIFFERENCE, na.rm = TRUE), 5),
variance = round(var(MEAN_DIFFERENCE, na.rm = TRUE), 5),
sd = round(sd(MEAN_DIFFERENCE, na.rm = TRUE), 5),
.groups = 'drop'
)
print(desc_stats_by_temporal)
```
# Assumption Testing
## Missing Values Check
```{r missing-values}
missing_summary <- long_data %>%
group_by(TIME, DOMAIN) %>%
summarise(
n_total = n(),
n_missing = sum(is.na(MEAN_DIFFERENCE)),
pct_missing = round(100 * n_missing / n_total, 2),
.groups = 'drop'
)
print(missing_summary)
```
## Outlier Detection
```{r outlier-detection}
outlier_summary <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
mean = mean(MEAN_DIFFERENCE),
sd = sd(MEAN_DIFFERENCE),
q1 = quantile(MEAN_DIFFERENCE, 0.25),
median = median(MEAN_DIFFERENCE),
q3 = quantile(MEAN_DIFFERENCE, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 1.5 * iqr,
upper_bound = q3 + 1.5 * iqr,
n_outliers = sum(MEAN_DIFFERENCE < lower_bound | MEAN_DIFFERENCE > upper_bound),
.groups = 'drop'
)
print(outlier_summary)
```
## Anderson-Darling Normality Test
```{r normality-test}
normality_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
n = n(),
ad_statistic = ad.test(.data$MEAN_DIFFERENCE)$statistic,
ad_p_value = ad.test(.data$MEAN_DIFFERENCE)$p.value,
.groups = 'drop'
)
print(normality_results)
```
## Homogeneity of Variance (Levene's Test)
### Test homogeneity across TIME within each DOMAIN
```{r homogeneity-time}
homogeneity_time <- long_data_clean %>%
group_by(DOMAIN) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ TIME)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ TIME)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_time)
```
### Test homogeneity across DOMAIN within each TIME
```{r homogeneity-domain}
homogeneity_domain <- long_data_clean %>%
group_by(TIME) %>%
summarise(
levene_F = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`F value`[1],
levene_p = leveneTest(MEAN_DIFFERENCE ~ DOMAIN)$`Pr(>F)`[1],
.groups = 'drop'
)
print(homogeneity_domain)
```
## Hartley's F-Max Test with Bootstrap Critical Values
```{r hartley-function}
# Function to calculate Hartley's F-max ratio
calculate_hartley_ratio <- function(variances) {
max(variances, na.rm = TRUE) / min(variances, na.rm = TRUE)
}
# More efficient bootstrap function for Hartley's F-max test
bootstrap_hartley_critical <- function(data, group_var, response_var, n_iter = 1000) {
# Get unique groups and their sample sizes
groups <- unique(data[[group_var]])
# Calculate observed variances for each group
observed_vars <- data %>%
dplyr::group_by(!!rlang::sym(group_var)) %>%
dplyr::summarise(var = var(!!rlang::sym(response_var), na.rm = TRUE), .groups = 'drop') %>%
dplyr::pull(var)
# Handle invalid variances
if(any(observed_vars <= 0 | is.na(observed_vars))) {
observed_vars[observed_vars <= 0 | is.na(observed_vars)] <- 1e-10
}
# Calculate observed F-max ratio
observed_ratio <- max(observed_vars) / min(observed_vars)
# Pre-allocate storage for bootstrap ratios
bootstrap_ratios <- numeric(n_iter)
# Get group data once
group_data_list <- map(groups, ~ {
group_data <- data[data[[group_var]] == .x, response_var]
group_data[!is.na(group_data)]
})
# Bootstrap with pre-allocated storage
for(i in 1:n_iter) {
# Bootstrap sample from each group independently
sample_vars <- map_dbl(group_data_list, ~ {
bootstrap_sample <- sample(.x, size = length(.x), replace = TRUE)
var(bootstrap_sample, na.rm = TRUE)
})
bootstrap_ratios[i] <- max(sample_vars) / min(sample_vars)
}
# Remove invalid ratios
valid_ratios <- bootstrap_ratios[is.finite(bootstrap_ratios) & !is.na(bootstrap_ratios)]
if(length(valid_ratios) == 0) {
stop("No valid bootstrap ratios generated")
}
# Calculate critical value (95th percentile)
critical_95 <- quantile(valid_ratios, 0.95, na.rm = TRUE)
# Return only essential information
return(list(
observed_ratio = observed_ratio,
critical_95 = critical_95,
n_valid_iterations = length(valid_ratios)
))
}
```
```{r hartley-results}
# For mixed ANOVA: Test homogeneity across BETWEEN-SUBJECTS factor (TEMPORAL_DO)
# within each combination of within-subjects factors (TIME × DOMAIN)
print(unique(long_data_clean$TEMPORAL_DO))
print(table(long_data_clean$TEMPORAL_DO))
observed_temporal_ratios <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
# Calculate variances for each TEMPORAL_DO level within this TIME × DOMAIN combination
past_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "01PAST"], na.rm = TRUE),
fut_var = var(MEAN_DIFFERENCE[TEMPORAL_DO == "02FUT"], na.rm = TRUE),
# Calculate F-max ratio
f_max_ratio = max(past_var, fut_var) / min(past_var, fut_var),
.groups = 'drop'
) %>%
select(TIME, DOMAIN, past_var, fut_var, f_max_ratio)
print(observed_temporal_ratios)
# Hartley's F-max test across TEMPORAL_DO within each TIME × DOMAIN combination
set.seed(123) # For reproducibility
hartley_temporal_results <- long_data_clean %>%
group_by(TIME, DOMAIN) %>%
summarise(
hartley_result = list(bootstrap_hartley_critical(pick(TEMPORAL_DO, MEAN_DIFFERENCE), "TEMPORAL_DO", "MEAN_DIFFERENCE")),
.groups = 'drop'
) %>%
mutate(
observed_ratio = map_dbl(hartley_result, ~ .x$observed_ratio),
critical_95 = map_dbl(hartley_result, ~ .x$critical_95),
significant = observed_ratio > critical_95
) %>%
select(TIME, DOMAIN, observed_ratio, critical_95, significant)
print(hartley_temporal_results)
```
# Mixed ANOVA Analysis
## Design Balance Check
```{r design-check}
# Check for complete cases
complete_cases <- sum(complete.cases(long_data_clean))
print(complete_cases)
# Check if design is balanced
design_balance <- table(long_data_clean$pID, long_data_clean$TIME, long_data_clean$DOMAIN)
if(all(design_balance %in% c(0, 1))) {
print("Design is balanced: each participant has data for all TIME × DOMAIN combinations")
} else {
print("Warning: Design is unbalanced")
print(summary(as.vector(design_balance)))
}
```
## Mixed ANOVA with Sphericity Corrections
```{r mixed-anova}
# Mixed ANOVA using ezANOVA with automatic sphericity corrections
# Between-subjects: TEMPORAL_DO (2 levels: 01PAST, 02FUT)
# Within-subjects: TIME (2 levels: Past, Future) × DOMAIN (4 levels: Preferences, Personality, Values, Life)
mixed_anova_model <- ezANOVA(data = long_data_clean,
dv = MEAN_DIFFERENCE,
wid = pID,
between = TEMPORAL_DO,
within = .(TIME, DOMAIN),
type = 3,
detailed = TRUE)
anova_output <- mixed_anova_model$ANOVA
rownames(anova_output) <- NULL # Reset row numbers to be sequential
print(anova_output)
```
## Mauchly's Test for Sphericity
```{r mauchly-test}
print(mixed_anova_model$Mauchly)
```
## Sphericity-Corrected Results
```{r sphericity-corrections}
# Show sphericity-corrected results (Greenhouse-Geisser and Huynh-Feldt)
if(!is.null(mixed_anova_model$`Sphericity Corrections`)) {
print(mixed_anova_model$`Sphericity Corrections`)
# Extract and display corrected degrees of freedom
sphericity_corr <- mixed_anova_model$`Sphericity Corrections`
anova_table <- mixed_anova_model$ANOVA
corrected_df <- data.frame(
Effect = sphericity_corr$Effect,
Original_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)],
Original_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)],
GG_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
GG_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$GGe,
HF_DFn = anova_table$DFn[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
HF_DFd = anova_table$DFd[match(sphericity_corr$Effect, anova_table$Effect)] * sphericity_corr$HFe,
GG_epsilon = sphericity_corr$GGe,
HF_epsilon = sphericity_corr$HFe
)
print(corrected_df)
# Between-subjects effects (no sphericity corrections needed)
between_effects <- c("TEMPORAL_DO")
for(effect in between_effects) {
if(effect %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == effect]
dfn <- anova_table$DFn[anova_table$Effect == effect]
dfd <- anova_table$DFd[anova_table$Effect == effect]
p_value <- anova_table$p[anova_table$Effect == effect]
cat(sprintf("%s: F(%d, %d) = %.3f, p = %.6f\n", effect, dfn, dfd, f_value, p_value))
}
}
# Within-subjects effects (sphericity corrections where applicable)
# TIME main effect (2 levels, sphericity automatically satisfied)
if("TIME" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "TIME"]
dfn <- anova_table$DFn[anova_table$Effect == "TIME"]
dfd <- anova_table$DFd[anova_table$Effect == "TIME"]
p_value <- anova_table$p[anova_table$Effect == "TIME"]
cat(sprintf("TIME: F(%d, %d) = %.3f, p = %.6f (2 levels, sphericity satisfied)\n", dfn, dfd, f_value, p_value))
}
# DOMAIN main effect (4 levels, needs sphericity correction)
if("DOMAIN" %in% anova_table$Effect) {
f_value <- anova_table$F[anova_table$Effect == "DOMAIN"]
dfn <- anova_table$DFn[anova_table$Effect == "DOMAIN"]
dfd <- anova_table$DFd[anova_table$Effect == "DOMAIN"]
p_value <- anova_table$p[anova_table$Effect == "DOMAIN"]
cat(sprintf("DOMAIN: F(%d, %d) = %.3f, p = %.6f\n", dfn, dfd, f_value, p_value))
}
# Interactions with sphericity corrections
for(i in seq_len(nrow(corrected_df))) {
effect <- corrected_df$Effect[i]
f_value <- anova_table$F[match(effect, anova_table$Effect)]
cat(sprintf("\n%s:\n", effect))
cat(sprintf(" Original: F(%d, %d) = %.3f\n",
corrected_df$Original_DFn[i], corrected_df$Original_DFd[i], f_value))
cat(sprintf(" GG-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$GG_DFn[i], corrected_df$GG_DFd[i], f_value, sphericity_corr$`p[GG]`[i]))
cat(sprintf(" HF-corrected: F(%.2f, %.2f) = %.3f, p = %.6f\n",
corrected_df$HF_DFn[i], corrected_df$HF_DFd[i], f_value, sphericity_corr$`p[HF]`[i]))
}
} else {
print("\nNote: Sphericity corrections not needed (sphericity assumption met)")
}
```
# Effect Sizes (Cohen's d)
## Main Effects
```{r cohens-d-main}
# Create aov model for emmeans
aov_model <- aov(MEAN_DIFFERENCE ~ TEMPORAL_DO * TIME * DOMAIN + Error(pID/(TIME * DOMAIN)),
data = long_data_clean)
# Main Effect of TIME
time_emmeans <- emmeans(aov_model, ~ TIME)
print(time_emmeans)
time_main_contrast <- pairs(time_emmeans, adjust = "Bonferroni")
time_main_df <- as.data.frame(time_main_contrast)
print(time_main_df)
# Calculate Cohen's d for TIME main effect
if(nrow(time_main_df) > 0) {
cat("\nCohen's d for TIME main effect:\n")
time_past_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Past"]
time_future_data <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$TIME == "Future"]
time_cohens_d <- cohen.d(time_past_data, time_future_data)
cat(sprintf("Past vs Future: n1 = %d, n2 = %d\n", length(time_past_data), length(time_future_data)))
cat(sprintf("Cohen's d: %.5f\n", time_cohens_d$estimate))
cat(sprintf("Effect size interpretation: %s\n", time_cohens_d$magnitude))
cat(sprintf("p-value: %.5f\n", time_main_df$p.value[1]))
}
```
```{r cohens-d-domain}
# Main Effect of DOMAIN (significant: p < 0.001)
domain_emmeans <- emmeans(aov_model, ~ DOMAIN)
print(domain_emmeans)
domain_main_contrast <- pairs(domain_emmeans, adjust = "bonferroni")
domain_main_df <- as.data.frame(domain_main_contrast)
print(domain_main_df)
# Calculate Cohen's d for significant DOMAIN contrasts
significant_domain <- domain_main_df[domain_main_df$p.value < 0.05, ]
if(nrow(significant_domain) > 0) {
cat("\nCohen's d for significant DOMAIN contrasts:\n")
for(i in seq_len(nrow(significant_domain))) {
contrast_name <- as.character(significant_domain$contrast[i])
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
data1 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level1]
data2 <- long_data_clean$MEAN_DIFFERENCE[long_data_clean$DOMAIN == level2]
if(length(data1) > 0 && length(data2) > 0) {
domain_cohens_d <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s\n", contrast_name))
cat(sprintf(" n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", domain_cohens_d$estimate))
cat(sprintf(" Effect size interpretation: %s\n", domain_cohens_d$magnitude))
cat(sprintf(" p-value: %.5f\n", significant_domain$p.value[i]))
cat("\n")
}
}
}
}
```
## Two-Way Interactions
```{r cohens-d-function}
# Function to calculate Cohen's d for pairwise comparisons
calculate_cohens_d_for_pairs <- function(pairs_df, data, group1_var, group2_var, response_var) {
significant_pairs <- pairs_df[pairs_df$p.value < 0.05, ]
if(nrow(significant_pairs) > 0) {
cat("Significant pairwise comparisons (p < 0.05):\n")
print(significant_pairs)
cat("\nCohen's d calculated from raw data:\n")
for(i in seq_len(nrow(significant_pairs))) {
comparison <- significant_pairs[i, ]
contrast_name <- as.character(comparison$contrast)
# Parse the contrast
contrast_parts <- strsplit(contrast_name, " - ")[[1]]
if(length(contrast_parts) == 2) {
level1 <- trimws(contrast_parts[1])
level2 <- trimws(contrast_parts[2])
# Get raw data for both conditions
if(group2_var %in% colnames(comparison)) {
group2_level <- as.character(comparison[[group2_var]])
data1 <- data[[response_var]][
data[[group1_var]] == level1 &
data[[group2_var]] == group2_level]
data2 <- data[[response_var]][
data[[group1_var]] == level2 &
data[[group2_var]] == group2_level]
} else {
data1 <- data[[response_var]][data[[group1_var]] == level1]
data2 <- data[[response_var]][data[[group1_var]] == level2]
}
if(length(data1) > 0 && length(data2) > 0) {
# Calculate Cohen's d using effsize package
cohens_d_result <- cohen.d(data1, data2)
cat(sprintf("Comparison: %s", contrast_name))
if(group2_var %in% colnames(comparison)) {
cat(sprintf(" | %s", group2_level))
}
cat(sprintf("\n n1 = %d, n2 = %d\n", length(data1), length(data2)))
cat(sprintf(" Cohen's d: %.5f\n", cohens_d_result$estimate))
cat(sprintf(" Effect size interpretation: %s\n", cohens_d_result$magnitude))
cat(sprintf(" p-value: %.5f\n", comparison$p.value))
cat("\n")
}
}
}
} else {
cat("No significant pairwise comparisons found.\n")
}
}
```
```{r interaction-effects}
# Note: These sections would need the actual simple effects results from your analysis
# The original script references undefined variables: temporal_time_simple and time_domain_simple
# These would need to be calculated using emmeans for simple effects
# 1. TEMPORAL_DO × TIME INTERACTION
# temporal_time_simple <- emmeans(aov_model, ~ TIME | TEMPORAL_DO)
# temporal_time_simple_df <- as.data.frame(pairs(temporal_time_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(temporal_time_simple_df, long_data_clean, "TIME", "TEMPORAL_DO", "MEAN_DIFFERENCE")
# 2. TIME × DOMAIN INTERACTION
# time_domain_simple <- emmeans(aov_model, ~ DOMAIN | TIME)
# time_domain_simple_df <- as.data.frame(pairs(time_domain_simple, adjust = "bonferroni"))
# calculate_cohens_d_for_pairs(time_domain_simple_df, long_data_clean, "DOMAIN", "TIME", "MEAN_DIFFERENCE")
```
# Interaction Plot
```{r interaction-plot}
# Define color palette for DOMAIN (4 levels)
cbp1 <- c("#648FFF", "#DC267F", "#FFB000", "#FE6100", "#785EF0")
# Define TIME levels (Past, Future order)
time_levels <- c("Past", "Future")
# Create estimated marginal means for DOMAIN x TIME
emm_full <- emmeans(aov_model, ~ DOMAIN * TIME)
# Prepare emmeans data frame
emmeans_data2 <- emm_full %>%
as.data.frame() %>%
filter(!is.na(lower.CL) & !is.na(upper.CL) & !is.na(emmean)) %>%
rename(
ci_lower = lower.CL,
ci_upper = upper.CL,
plot_mean = emmean
) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
iPlot <- long_data_clean %>%
dplyr::select(pID, DOMAIN, TIME, MEAN_DIFFERENCE) %>%
mutate(
DOMAIN = factor(DOMAIN, levels = c("Preferences", "Personality", "Values", "Life")),
TIME = factor(TIME, levels = time_levels)
)
# Plot without TEMPORAL_DO facet
interaction_plot2 <- ggplot() +
geom_point(
data = iPlot,
aes(x = TIME, y = MEAN_DIFFERENCE, color = DOMAIN),
position = position_jitterdodge(dodge.width = 0.75, jitter.width = 0.2),
alpha = 0.3, shape = 16
) +
geom_rect(
data = emmeans_data2,
aes(
xmin = as.numeric(TIME) - 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
xmax = as.numeric(TIME) + 0.08 + (as.numeric(DOMAIN) - 2.5) * 0.15,
ymin = ci_lower, ymax = ci_upper,
fill = DOMAIN
),
color = "black", alpha = 0.5
) +
geom_segment(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
xend = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = ci_lower, yend = ci_upper
),
color = "black"
) +
geom_point(
data = emmeans_data2,
aes(
x = as.numeric(TIME) + (as.numeric(DOMAIN) - 2.5) * 0.15,
y = plot_mean,
color = DOMAIN,
shape = DOMAIN
),
size = 2.5, stroke = 0.8, fill = "black"
) +
labs(
x = "TIME", y = "Mean Difference",
title = "DOMAIN × TIME Interaction", subtitle = ""
) +
scale_color_manual(name = "DOMAIN", values = cbp1) +
scale_fill_manual(name = "DOMAIN", values = cbp1) +
scale_shape_manual(name = "DOMAIN", values = c(21, 22, 23, 24)) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 0, hjust = 0.5),
plot.title = element_text(size = 14, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5)
)
print(interaction_plot2)
```

View File

@ -0,0 +1,15 @@
options(scipen = 999)
library(dplyr)
library(car)
library(lmtest)
library(stargazer)
library(sandwich)
library(lmtest)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_edu)

View File

@ -0,0 +1,162 @@
# Assumption Checks Before Cronbach's Alpha Analysis
# Run this BEFORE the main reliability analysis
library(psych)
library(corrplot)
library(ggplot2)
# Read the data
data <- read.csv("exp1.csv")
# Define scale variables
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
"NPastDiff_val_performance", "NPastDiff_val_justice")
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
"NPastDiff_life_important", "NPastDiff_life_change")
# Function to check assumptions for a scale
check_assumptions <- function(data, var_names, scale_name) {
cat("\n", "="*60, "\n")
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
cat("="*60, "\n")
# Get scale data
scale_data <- data[, var_names]
# 1. Sample size check
complete_cases <- sum(complete.cases(scale_data))
cat("1. SAMPLE SIZE CHECK:\n")
cat(" Total participants:", nrow(data), "\n")
cat(" Complete cases:", complete_cases, "\n")
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
if(complete_cases < 30) {
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
return(FALSE)
}
# 2. Missing data check
cat("\n2. MISSING DATA CHECK:\n")
missing_counts <- colSums(is.na(scale_data))
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
cat(" Missing data by item:\n")
for(i in 1:length(var_names)) {
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
}
max_missing <- max(missing_pct)
cat(" Maximum missing:", max_missing, "%\n")
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
# 3. Use only complete cases for remaining checks
complete_data <- scale_data[complete.cases(scale_data), ]
# 4. Normality check (Shapiro-Wilk test on first item as example)
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
shapiro_result <- shapiro.test(complete_data[, 1])
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
} else {
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
}
# 5. Inter-item correlations check
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
cor_matrix <- cor(complete_data)
# Get off-diagonal correlations
cor_matrix[lower.tri(cor_matrix)] <- NA
diag(cor_matrix) <- NA
cors <- as.vector(cor_matrix)
cors <- cors[!is.na(cors)]
positive_cors <- sum(cors > 0)
strong_cors <- sum(cors > 0.30)
negative_cors <- sum(cors < 0)
cat(" Total correlations:", length(cors), "\n")
cat(" Positive correlations:", positive_cors, "\n")
cat(" Strong correlations (>0.30):", strong_cors, "\n")
cat(" Negative correlations:", negative_cors, "\n")
cat(" Mean correlation:", round(mean(cors), 4), "\n")
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
if(negative_cors > 0) {
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
}
if(strong_cors / length(cors) < 0.5) {
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
}
# 6. Item variance check
cat("\n5. ITEM VARIANCE CHECK:\n")
item_vars <- apply(complete_data, 2, var)
var_ratio <- max(item_vars) / min(item_vars)
cat(" Item variances:", round(item_vars, 4), "\n")
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
# 7. Outlier check
cat("\n6. OUTLIER CHECK:\n")
# Check for multivariate outliers using Mahalanobis distance
if(nrow(complete_data) > ncol(complete_data)) {
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
outliers <- sum(mahal_dist > outlier_threshold)
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
}
# 8. Summary recommendation
cat("\n7. OVERALL RECOMMENDATION:\n")
issues <- 0
if(complete_cases < 30) issues <- issues + 1
if(max_missing >= 20) issues <- issues + 1
if(negative_cors > 0) issues <- issues + 1
if(var_ratio >= 4) issues <- issues + 1
if(issues == 0) {
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
} else if(issues <= 2) {
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
} else {
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
}
return(TRUE)
}
# Check assumptions for all past scales
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
cat("\nData: exp1.csv")
cat("\nTotal sample size:", nrow(data))
check_assumptions(data, past_pref_vars, "Past Preferences")
check_assumptions(data, past_pers_vars, "Past Personality")
check_assumptions(data, past_val_vars, "Past Values")
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
# Quick check of future scales (you can expand this)
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
check_assumptions(data, fut_pref_vars, "Future Preferences")
cat("\n", "="*60, "\n")
cat("GENERAL GUIDELINES:\n")
cat("="*60, "\n")
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
cat("⚠️ If some assumptions are violated, interpret with caution\n")
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
cat(" - Omega coefficient (more robust to violations)\n")
cat(" - Split-half reliability\n")
cat(" - Test-retest reliability\n")
cat(" - Factor analysis to check dimensionality\n")

View File

@ -0,0 +1,162 @@
# Assumption Checks Before Cronbach's Alpha Analysis
# Run this BEFORE the main reliability analysis
library(psych)
library(corrplot)
library(ggplot2)
# Read the data
data <- read.csv("exp1.csv")
# Define scale variables
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
"NPastDiff_val_performance", "NPastDiff_val_justice")
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
"NPastDiff_life_important", "NPastDiff_life_change")
# Function to check assumptions for a scale
check_assumptions <- function(data, var_names, scale_name) {
cat("\n", "="*60, "\n")
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
cat("="*60, "\n")
# Get scale data
scale_data <- data[, var_names]
# 1. Sample size check
complete_cases <- sum(complete.cases(scale_data))
cat("1. SAMPLE SIZE CHECK:\n")
cat(" Total participants:", nrow(data), "\n")
cat(" Complete cases:", complete_cases, "\n")
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
if(complete_cases < 30) {
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
return(FALSE)
}
# 2. Missing data check
cat("\n2. MISSING DATA CHECK:\n")
missing_counts <- colSums(is.na(scale_data))
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
cat(" Missing data by item:\n")
for(i in 1:length(var_names)) {
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
}
max_missing <- max(missing_pct)
cat(" Maximum missing:", max_missing, "%\n")
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
# 3. Use only complete cases for remaining checks
complete_data <- scale_data[complete.cases(scale_data), ]
# 4. Normality check (Shapiro-Wilk test on first item as example)
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
shapiro_result <- shapiro.test(complete_data[, 1])
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
} else {
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
}
# 5. Inter-item correlations check
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
cor_matrix <- cor(complete_data)
# Get off-diagonal correlations
cor_matrix[lower.tri(cor_matrix)] <- NA
diag(cor_matrix) <- NA
cors <- as.vector(cor_matrix)
cors <- cors[!is.na(cors)]
positive_cors <- sum(cors > 0)
strong_cors <- sum(cors > 0.30)
negative_cors <- sum(cors < 0)
cat(" Total correlations:", length(cors), "\n")
cat(" Positive correlations:", positive_cors, "\n")
cat(" Strong correlations (>0.30):", strong_cors, "\n")
cat(" Negative correlations:", negative_cors, "\n")
cat(" Mean correlation:", round(mean(cors), 4), "\n")
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
if(negative_cors > 0) {
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
}
if(strong_cors / length(cors) < 0.5) {
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
}
# 6. Item variance check
cat("\n5. ITEM VARIANCE CHECK:\n")
item_vars <- apply(complete_data, 2, var)
var_ratio <- max(item_vars) / min(item_vars)
cat(" Item variances:", round(item_vars, 4), "\n")
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
# 7. Outlier check
cat("\n6. OUTLIER CHECK:\n")
# Check for multivariate outliers using Mahalanobis distance
if(nrow(complete_data) > ncol(complete_data)) {
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
outliers <- sum(mahal_dist > outlier_threshold)
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
}
# 8. Summary recommendation
cat("\n7. OVERALL RECOMMENDATION:\n")
issues <- 0
if(complete_cases < 30) issues <- issues + 1
if(max_missing >= 20) issues <- issues + 1
if(negative_cors > 0) issues <- issues + 1
if(var_ratio >= 4) issues <- issues + 1
if(issues == 0) {
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
} else if(issues <= 2) {
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
} else {
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
}
return(TRUE)
}
# Check assumptions for all past scales
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
cat("\nData: exp1.csv")
cat("\nTotal sample size:", nrow(data))
check_assumptions(data, past_pref_vars, "Past Preferences")
check_assumptions(data, past_pers_vars, "Past Personality")
check_assumptions(data, past_val_vars, "Past Values")
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
# Quick check of future scales (you can expand this)
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
check_assumptions(data, fut_pref_vars, "Future Preferences")
cat("\n", "="*60, "\n")
cat("GENERAL GUIDELINES:\n")
cat("="*60, "\n")
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
cat("⚠️ If some assumptions are violated, interpret with caution\n")
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
cat(" - Omega coefficient (more robust to violations)\n")
cat(" - Split-half reliability\n")
cat(" - Test-retest reliability\n")
cat(" - Factor analysis to check dimensionality\n")

View File

@ -0,0 +1,164 @@
# Assumption Checks Before Cronbach's Alpha Analysis
# Run this BEFORE the main reliability analysis
library(psych)
library(corrplot)
library(ggplot2)
# Read the data
data <- read.csv("exp1.csv")
# Define scale variables
past_pref_vars <- c("NPastDiff_pref_read", "NPastDiff_pref_music", "NPastDiff_pref_tv",
"NPastDiff_pref_nap", "NPastDiff_pref_travel")
past_pers_vars <- c("NPastDiff_pers_extravert", "NPastDiff_pers_critical", "NPastDiff_pers_dependable",
"NPastDiff_pers_anxious", "NPastDiff_pers_complex")
past_val_vars <- c("NPastDiff_val_obey", "NPastDiff_val_trad", "NPastDiff_val_opinion",
"NPastDiff_val_performance", "NPastDiff_val_justice")
past_life_vars <- c("NPastDiff_life_ideal", "NPastDiff_life_excellent", "NPastDiff_life_satisfied",
"NPastDiff_life_important", "NPastDiff_life_change")
# Function to check assumptions for a scale
check_assumptions <- function(data, var_names, scale_name) {
cat("\n", "="*60, "\n")
cat("ASSUMPTION CHECKS FOR:", scale_name, "\n")
cat("="*60, "\n")
# Get scale data
scale_data <- data[, var_names]
# 1. Sample size check
complete_cases <- sum(complete.cases(scale_data))
cat("1. SAMPLE SIZE CHECK:\n")
cat(" Total participants:", nrow(data), "\n")
cat(" Complete cases:", complete_cases, "\n")
cat(" Adequate (≥30)?", ifelse(complete_cases >= 30, "✓ YES", "✗ NO"), "\n")
if(complete_cases < 30) {
cat(" WARNING: Sample size too small for reliable alpha estimates\n")
return(FALSE)
}
# 2. Missing data check
cat("\n2. MISSING DATA CHECK:\n")
missing_counts <- colSums(is.na(scale_data))
missing_pct <- round(missing_counts / nrow(data) * 100, 2)
cat(" Missing data by item:\n")
for(i in 1:length(var_names)) {
cat(" ", var_names[i], ":", missing_counts[i], "(", missing_pct[i], "%)\n")
}
max_missing <- max(missing_pct)
cat(" Maximum missing:", max_missing, "%\n")
cat(" Acceptable (<20%)?", ifelse(max_missing < 20, "✓ YES", "✗ NO"), "\n")
# 3. Use only complete cases for remaining checks
complete_data <- scale_data[complete.cases(scale_data), ]
# 4. Normality check (Shapiro-Wilk test on first item as example)
cat("\n3. NORMALITY CHECK (Shapiro-Wilk test on first item):\n")
if(nrow(complete_data) <= 5000) { # Shapiro-Wilk has sample size limit
shapiro_result <- shapiro.test(complete_data[, 1])
cat(" p-value:", round(shapiro_result$p.value, 4), "\n")
cat(" Normal?", ifelse(shapiro_result$p.value > 0.05, "✓ YES", "✗ NO (but alpha is robust)"), "\n")
} else {
cat(" Sample too large for Shapiro-Wilk test (alpha is robust to non-normality)\n")
}
# 5. Inter-item correlations check
cat("\n4. INTER-ITEM CORRELATIONS CHECK:\n")
cor_matrix <- cor(complete_data)
# Get off-diagonal correlations
cor_matrix[lower.tri(cor_matrix)] <- NA
diag(cor_matrix) <- NA
cors <- as.vector(cor_matrix)
cors <- cors[!is.na(cors)]
positive_cors <- sum(cors > 0)
strong_cors <- sum(cors > 0.30)
negative_cors <- sum(cors < 0)
cat(" Total correlations:", length(cors), "\n")
cat(" Positive correlations:", positive_cors, "\n")
cat(" Strong correlations (>0.30):", strong_cors, "\n")
cat(" Negative correlations:", negative_cors, "\n")
cat(" Mean correlation:", round(mean(cors), 4), "\n")
cat(" Range:", round(min(cors), 4), "to", round(max(cors), 4), "\n")
if(negative_cors > 0) {
cat(" ⚠️ WARNING: Negative correlations suggest potential issues\n")
}
if(strong_cors / length(cors) < 0.5) {
cat(" ⚠️ WARNING: Many weak correlations may indicate poor scale coherence\n")
}
# 6. Item variance check
cat("\n5. ITEM VARIANCE CHECK:\n")
item_vars <- apply(complete_data, 2, var)
var_ratio <- max(item_vars) / min(item_vars)
cat(" Item variances:", round(item_vars, 4), "\n")
cat(" Variance ratio (max/min):", round(var_ratio, 4), "\n")
cat(" Acceptable (<4:1)?", ifelse(var_ratio < 4, "✓ YES", "✗ NO"), "\n")
# 7. Outlier check
cat("\n6. OUTLIER CHECK:\n")
# Check for multivariate outliers using Mahalanobis distance
if(nrow(complete_data) > ncol(complete_data)) {
mahal_dist <- mahalanobis(complete_data, colMeans(complete_data), cov(complete_data))
outlier_threshold <- qchisq(0.999, df = ncol(complete_data))
outliers <- sum(mahal_dist > outlier_threshold)
cat(" Multivariate outliers (p<0.001):", outliers, "\n")
cat(" Acceptable (<5%)?", ifelse(outliers/nrow(complete_data) < 0.05, "✓ YES", "✗ NO"), "\n")
}
# 8. Summary recommendation
cat("\n7. OVERALL RECOMMENDATION:\n")
issues <- 0
if(complete_cases < 30) issues <- issues + 1
if(max_missing >= 20) issues <- issues + 1
if(negative_cors > 0) issues <- issues + 1
if(var_ratio >= 4) issues <- issues + 1
if(issues == 0) {
cat(" ✓ PROCEED with Cronbach's alpha analysis\n")
} else if(issues <= 2) {
cat(" ⚠️ PROCEED with CAUTION - some assumptions violated\n")
} else {
cat(" ✗ CONSIDER alternatives or data cleaning before proceeding\n")
}
return(TRUE)
}
# Check assumptions for all past scales
cat("CRONBACH'S ALPHA ASSUMPTION CHECKS")
cat("\nData: exp1.csv")
cat("\nTotal sample size:", nrow(data))
check_assumptions(data, past_pref_vars, "Past Preferences")
check_assumptions(data, past_pers_vars, "Past Personality")
check_assumptions(data, past_val_vars, "Past Values")
check_assumptions(data, past_life_vars, "Past Life Satisfaction")
# Quick check of future scales (you can expand this)
fut_pref_vars <- c("NFutDiff_pref_read", "NFutDiff_pref_music", "NFutDiff_pref_tv",
"NFutDiff_pref_nap", "NFutDiff_pref_travel")
check_assumptions(data, fut_pref_vars, "Future Preferences")
cat("\n", "="*60, "\n")
cat("GENERAL GUIDELINES:\n")
cat("="*60, "\n")
cat("✓ If most assumptions are met, Cronbach's alpha is appropriate\n")
cat("⚠️ If some assumptions are violated, interpret with caution\n")
cat("✗ If many assumptions are violated, consider alternative approaches:\n")
cat(" - Omega coefficient (more robust to violations)\n")
cat(" - Split-half reliability\n")
cat(" - Test-retest reliability\n")
cat(" - Factor analysis to check dimensionality\n")

View File

@ -0,0 +1,82 @@
Function GetCol(headerName As String, ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
GetCol = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
If IsError(GetCol) Then GetCol = 0
On Error GoTo 0
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,87 @@
Function GetCol(headerName As String, ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,87 @@
Function GetCol(headerName As String, ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,87 @@
Function GetCol(headerName As String, ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,87 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,87 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,87 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 0) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 0) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = ws.Cells(r, colSourceCON).Value
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
If checkVal = "TRUE" Then
result = ((val / 100) - 1) ^ 2
ElseIf checkVal = "FALSE" Then
result = ((1 - (val / 100)) - 1) ^ 2
Else
result = CVErr(xlErrNA)
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,141 @@
Function GetCol(ByVal headerName As String, ByVal ws As Worksheet) As Long
Dim lastCol As Long
lastCol = ws.Cells(1, ws.Columns.Count).End(xlToLeft).Column
On Error Resume Next
Dim m As Variant
m = Application.Match(headerName, ws.Range(ws.Cells(1, 1), ws.Cells(1, lastCol)), 0)
On Error GoTo 0
If IsError(m) Then
GetCol = 0
Else
GetCol = CLng(m)
End If
End Function
Private Function NormalizeTruth(ByVal v As Variant) As Variant
' Returns True/False or Null if cannot determine
If VarType(v) = vbBoolean Then
NormalizeTruth = v
Exit Function
End If
If IsError(v) Or IsEmpty(v) Then
NormalizeTruth = Null
Exit Function
End If
Dim s As String
s = Trim$(UCase$(CStr(v)))
Select Case s
Case "TRUE", "T", "1", "YES", "Y"
NormalizeTruth = True
Case "FALSE", "F", "0", "NO", "N"
NormalizeTruth = False
Case Else
NormalizeTruth = Null
End Select
End Function
Private Function NormalizeProb01(ByVal v As Variant) As Double
' Converts confidence values to [0,1]
If IsError(v) Or IsEmpty(v) Then
NormalizeProb01 = -1
Exit Function
End If
Dim s As String
s = CStr(v)
If InStr(s, "%") > 0 Then
s = Replace$(s, "%", "")
If IsNumeric(s) Then
NormalizeProb01 = CDbl(s) / 100#
Exit Function
End If
End If
If IsNumeric(v) Then
Dim d As Double
d = CDbl(v)
If d > 1# Then
NormalizeProb01 = d / 100#
Else
NormalizeProb01 = d
End If
Else
NormalizeProb01 = -1
End If
End Function
Sub brierVARS()
Dim false_vars As Variant
Dim true_vars As Variant
false_vars = Array("moza_55_F_1", "moza_55_CON", "demo_15_F_1", "demo_15_CON", "hume_35_F_1", "hume_35_CON", "gulf_15_F_1", "gulf_15_CON", "memo_75_F_1", "memo_75_CON", "vitc_55_F_1", "vitc_55_CON", "hert_35_F_1", "hert_35_CON", "gees_55_F_1", "gees_55_CON", "gang_15_F_1", "gang_15_CON", "list_75_F_1", "list_75_CON", "mont_35_F_1", "mont_35_CON", "dwar_55_F_1", "dwar_55_CON", "pucc_15_F_1", "pucc_15_CON", "spee_75_F_1", "spee_75_CON", "lute_35_F_1", "lute_35_CON", "croc_75_F_1", "croc_75_CON")
true_vars = Array("vaud_15_T_1", "vaud_15_CON", "oedi_35_T_1", "oedi_35_CON", "mons_55_T_1", "mons_55_CON", "gest_75_T_1", "gest_75_CON", "kabu_15_T_1", "kabu_15_CON", "sham_55_T_1", "sham_55_CON", "pana_35_T_1", "pana_35_CON", "bohr_15_T_1", "bohr_15_CON", "chur_75_T_1", "chur_75_CON", "carb_35_T_1", "carb_35_CON", "cons_55_T_1", "cons_55_CON", "papy_75_T_1", "papy_75_CON", "dors_55_T_1", "dors_55_CON", "tsun_75_T_1", "tsun_75_CON", "troy_15_T_1", "troy_15_CON", "lock_35_T_1", "lock_35_CON")
Dim target_headers As Variant
target_headers = Array("gest_T_ex", "dors_T_ex", "chur_T_ex", "mons_T_ex", "lock_T_easy", "hume_F_easy", "papy_T_easy", "sham_T_easy", "list_F_easy", "cons_T_easy", "tsun_T_easy", "pana_T_easy", "kabu_T_easy", "gulf_F_easy", "oedi_T_easy", "vaud_T_easy", "mont_F_easy", "demo_F_easy", "spee_F_hard", "dwar_F_hard", "carb_T_hard", "bohr_T_hard", "gang_F_hard", "vitc_F_hard", "hert_F_hard", "pucc_F_hard", "troy_T_hard", "moza_F_hard", "croc_F_hard", "gees_F_hard", "lute_F_hard", "memo_F_hard")
Dim ws As Worksheet
Set ws = ThisWorkbook.Sheets(1)
Dim i As Long, rowCount As Long, colSource1 As Long, colSourceCON As Long, colTarget As Long
Dim srcVar As String, matchPrefix As String, checkVal As String, val As Double
Dim result As Variant
Dim r As Long, j As Long
rowCount = ws.Cells(ws.Rows.Count, "A").End(xlUp).Row
For i = 0 To UBound(false_vars) Step 2
srcVar = false_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(false_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth As Variant
truth = NormalizeTruth(checkVal)
If IsNull(truth) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth = True Then
result = (val - 0#) ^ 2
Else
result = ((1# - val) - 0#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
For i = 0 To UBound(true_vars) Step 2
srcVar = true_vars(i)
matchPrefix = Left(srcVar, InStrRev(srcVar, "_") - 1)
colSource1 = GetCol(srcVar, ws)
colSourceCON = GetCol(true_vars(i + 1), ws)
If colSource1 > 0 And colSourceCON > 0 Then
For r = 2 To rowCount
checkVal = ws.Cells(r, colSource1).Value
val = NormalizeProb01(ws.Cells(r, colSourceCON).Value)
colTarget = 0
For j = 0 To UBound(target_headers)
If Left(target_headers(j), InStr(target_headers(j), "_") - 1) = Left(matchPrefix, InStr(matchPrefix, "_") - 1) Then
colTarget = GetCol(target_headers(j), ws)
Exit For
End If
Next j
If colTarget > 0 Then
Dim truth2 As Variant
truth2 = NormalizeTruth(checkVal)
If IsNull(truth2) Or val < 0 Then
result = CVErr(xlErrNA)
ElseIf truth2 = True Then
result = (val - 1#) ^ 2
Else
result = ((1# - val) - 1#) ^ 2
End If
ws.Cells(r, colTarget).Value = result
End If
Next r
End If
Next i
End Sub

View File

@ -0,0 +1,40 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
str(data)
colSums(is.na(data))
sapply(data, class)
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))

View File

@ -0,0 +1,50 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))

View File

@ -0,0 +1,50 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))

View File

@ -0,0 +1,50 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))

View File

@ -0,0 +1,50 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))

View File

@ -0,0 +1,67 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_matrix, 3))
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,67 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_matrix, 3))
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,67 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, age_centered, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_matrix, 3))
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,67 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_matrix, 3))
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,67 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_matrix, 3))
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,67 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_matrix, 3))
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,76 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations
library(Hmisc)
cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
# Print correlation matrix with significance
print("Correlation Matrix with Significance:")
print(cor_test$r)
print("\nP-values:")
print(cor_test$P)
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,76 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations
library(Hmisc)
cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
# Print correlation matrix with significance
print("Correlation Matrix with Significance:")
print(cor_test$r)
print("\nP-values:")
print(cor_test$P)
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,76 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations
library(Hmisc)
cor_test <- rcorr(as.matrix(numeric_vars), type = "spearman")
# Print correlation matrix with significance
print("Correlation Matrix with Significance:")
print(cor_test$r)
print("\nP-values:")
print(cor_test$P)
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,97 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values:")
print(round(cor_test$p, 3))
# Print significant correlations (p < 0.05)
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,97 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values:")
print(round(cor_test$p, 3))
# Print significant correlations (p < 0.05)
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix to CSV
write.csv(cor_matrix, "correlation_matrix.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")

View File

@ -0,0 +1,99 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values:")
print(round(cor_test$p, 3))
# Print significant correlations (p < 0.05)
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,99 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values:")
print(round(cor_test$p, 3))
# Print significant correlations (p < 0.05)
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,99 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix:")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values:")
print(round(cor_test$p, 3))
# Print significant correlations (p < 0.05)
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,111 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix (Spearman r values):")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values Matrix:")
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
print("\nAll correlations with r and p values:")
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,111 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix (Spearman r values):")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values Matrix:")
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
print("\nAll correlations with r and p values:")
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,111 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print("Correlation Matrix (Spearman r values):")
print(round(cor_test$r, 3))
# Print p-values
print("\nP-values Matrix:")
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
print("\nAll correlations with r and p values:")
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
print("\nSignificant correlations (p < 0.05):")
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
} else {
print("No significant correlations found.")
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,105 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,105 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,105 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,105 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)
print("Correlation matrix saved to correlation_matrix.csv")
print("P-values saved to correlation_pvalues.csv")

View File

@ -0,0 +1,103 @@
options(scipen = 999)
library(dplyr)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
df <- read.csv("ehi1.csv")
data <- df %>%
select(eohiDGEN_mean, ehi_global_mean, demo_sex, demo_age_1, edu3, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global) %>%
filter(demo_sex != "Prefer not to say")
print(colSums(is.na(data)))
print(sapply(data, class))
# Create dummy variable for sex (0 = Male, 1 = Female)
data$sex_dummy <- ifelse(data$demo_sex == "Female", 1, 0)
# Verify the dummy coding
print(table(data$demo_sex, data$sex_dummy))
#descriptives
# Descriptives for age
print(summary(data$demo_age_1))
print(sd(data$demo_age_1, na.rm = TRUE))
# Center demo_age_1 (subtract the mean)
data$age_centered <- data$demo_age_1 - mean(data$demo_age_1, na.rm = TRUE)
# Verify the centering
print(summary(data$age_centered))
# Descriptives for sex (frequency table)
print(table(data$demo_sex))
print(prop.table(table(data$demo_sex)))
# Descriptives for sex dummy variable
print(table(data$sex_dummy))
# Convert edu3 to numeric factor for correlations (1, 2, 3)
# First ensure edu3 is a factor, then convert to numeric
data$edu3 <- factor(data$edu3, levels = c("HS_TS", "C_Ug", "grad_prof"), ordered = TRUE)
data$edu_num <- as.numeric(data$edu3)
# Check the numeric conversion
print(table(data$edu_num, useNA = "ifany"))
# Verify the conversion
print(table(data$edu3, data$edu_num, useNA = "ifany"))
####correlation matrix ####
# Select numeric variables for correlation matrix
numeric_vars <- data %>%
select(eohiDGEN_mean, ehi_global_mean, sex_dummy, demo_age_1, edu_num, AOT_total, CRT_correct, CRT_int, bs_28, bs_easy, bs_hard, cal_selfActual, cal_global)
# Create Spearman correlation matrix
cor_matrix <- cor(numeric_vars, use = "complete.obs", method = "spearman")
# Print correlation matrix
print(round(cor_matrix, 3))
# Get significance tests for correlations using psych package
library(psych)
# Create correlation matrix with significance tests
cor_test <- corr.test(numeric_vars, method = "spearman", adjust = "none")
# Print correlation matrix
print(round(cor_test$r, 3))
# Print p-values
print(round(cor_test$p, 3))
# Print all correlations with r and p values (for reporting)
for(i in 1:nrow(cor_test$r)) {
for(j in 1:ncol(cor_test$r)) {
if(i != j) { # Skip diagonal
cat(colnames(numeric_vars)[i], "vs", colnames(numeric_vars)[j],
": r =", round(cor_test$r[i, j], 3),
", p =", round(cor_test$p[i, j], 3), "\n")
}
}
}
# Also print significant correlations summary
sig_cors <- which(cor_test$p < 0.05 & cor_test$p != 0, arr.ind = TRUE)
if(nrow(sig_cors) > 0) {
for(i in 1:nrow(sig_cors)) {
row_idx <- sig_cors[i, 1]
col_idx <- sig_cors[i, 2]
if(row_idx != col_idx) { # Skip diagonal
cat(colnames(numeric_vars)[row_idx], "vs", colnames(numeric_vars)[col_idx],
": r =", round(cor_test$r[row_idx, col_idx], 3),
", p =", round(cor_test$p[row_idx, col_idx], 3), "\n")
}
}
}
# Save correlation matrix and p-values to CSV files
write.csv(cor_test$r, "correlation_matrix.csv", row.names = TRUE)
write.csv(cor_test$p, "correlation_pvalues.csv", row.names = TRUE)

View File

@ -0,0 +1,67 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# If you want to export CSVs, uncomment:
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)

View File

@ -0,0 +1,67 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# If you want to export CSVs, uncomment:
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)

View File

@ -0,0 +1,74 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# If you want to export CSVs, uncomment:
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)

View File

@ -0,0 +1,74 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# If you want to export CSVs, uncomment:
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)

View File

@ -0,0 +1,74 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# If you want to export CSVs, uncomment:
# write.csv(corr_bs_eohi, "corr_bs_eohi.csv", row.names = FALSE)
# write.csv(corr_bs_cal, "corr_bs_cal.csv", row.names = FALSE)
# write.csv(wide_bs_eohi, "corr_bs_eohi_wide.csv", row.names = FALSE)
# write.csv(wide_bs_cal, "corr_bs_cal_wide.csv", row.names = FALSE)

View File

@ -0,0 +1,75 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,75 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,75 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (Pearson), pairwise complete
corr_tidy <- function(df, x_vars, y_vars) {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = "pearson"))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars)
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars)
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (r, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohi_pref","eohi_pers","eohi_val","eohi_life","eohi_mean",
"eohiDGEN_pref","eohiDGEN_pers","eohiDGEN_val","eohiDGEN_life","eohiDGEN_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,86 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Pearson)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "pearson")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "pearson")
# Compute correlations (Spearman)
corr_s_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_s_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Pearson r, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Pearson r, p, n)")
print(corr_bs_cal)
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_s_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_s_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_s_bs_eohi$group <- "EOHI"
corr_s_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_s_bs_eohi, corr_s_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,76 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,76 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,76 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,76 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
# Export a single CSV combining both sets
corr_bs_eohi$group <- "EOHI"
corr_bs_cal$group <- "Calibration"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,81 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
wide_cal_eohi <- to_wide(corr_cal_eohi)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
print(corr_cal_eohi)
# Export a single CSV combining all sets
corr_bs_eohi$group <- "BS_vs_EOHI"
corr_bs_cal$group <- "BS_vs_Cal"
corr_cal_eohi$group <- "Cal_vs_EOHI"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,81 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
wide_cal_eohi <- to_wide(corr_cal_eohi)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
print(corr_cal_eohi)
# Export a single CSV combining all sets
corr_bs_eohi$group <- "BS_vs_EOHI"
corr_bs_cal$group <- "BS_vs_Cal"
corr_cal_eohi$group <- "Cal_vs_EOHI"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,81 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("ehi1.csv")
# Keep only required columns for the analysis
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
df1 <- df1 %>% dplyr::select(dplyr::all_of(c(bs_vars, eohi_vars, cal_vars)))
# --- Brier score correlations vs EOHIs and Calibration ---
# Variables
bs_vars <- c("bs_28", "bs_easy", "bs_hard")
eohi_vars <- c(
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean",
"ehi_pref_mean", "ehi_pers_mean", "ehi_val_mean", "ehi_life_mean", "ehi_global_mean"
)
cal_vars <- c("cal_selfActual","cal_global")
# Helper: tidy correlation (method = "pearson" or "spearman"), pairwise complete
corr_tidy <- function(df, x_vars, y_vars, method = "pearson") {
grid <- expand.grid(x = x_vars, y = y_vars, stringsAsFactors = FALSE)
results <- purrr::pmap_dfr(grid, function(x, y) {
xv <- df[[x]]; yv <- df[[y]]
ok <- is.finite(xv) & is.finite(yv)
if (sum(ok) < 3) {
return(tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = NA_real_, p = NA_real_, method = method))
}
ct <- suppressWarnings(cor.test(xv[ok], yv[ok], method = method))
tibble::tibble(var_x = x, var_y = y, n = sum(ok), r = unname(ct$estimate), p = ct$p.value, method = method)
})
dplyr::arrange(results, var_x, var_y)
}
# Compute correlations (Spearman only)
corr_bs_eohi <- corr_tidy(df1, bs_vars, eohi_vars, method = "spearman")
corr_bs_cal <- corr_tidy(df1, bs_vars, cal_vars, method = "spearman")
corr_cal_eohi <- corr_tidy(df1, cal_vars, eohi_vars, method = "spearman")
# Wide r-only tables (optional)
to_wide <- function(d) {
tidyr::pivot_wider(d, id_cols = var_x, names_from = var_y, values_from = r)
}
wide_bs_eohi <- to_wide(corr_bs_eohi)
wide_bs_cal <- to_wide(corr_bs_cal)
wide_cal_eohi <- to_wide(corr_cal_eohi)
# Display
print("Correlations: Brier vs EOHIs (Spearman rho, p, n)")
print(corr_bs_eohi)
print("Correlations: Brier vs Calibration (Spearman rho, p, n)")
print(corr_bs_cal)
print("Correlations: Calibration vs EOHIs (Spearman rho, p, n)")
print(corr_cal_eohi)
# Export a single CSV combining all sets
corr_bs_eohi$group <- "BS_vs_EOHI"
corr_bs_cal$group <- "BS_vs_Cal"
corr_cal_eohi$group <- "Cal_vs_EOHI"
corr_all <- dplyr::bind_rows(corr_bs_eohi, corr_bs_cal, corr_cal_eohi) %>%
dplyr::relocate(group, .before = var_x)
write.csv(corr_all, "corr_bs_all.csv", row.names = FALSE)

View File

@ -0,0 +1,304 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,304 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,304 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,306 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,307 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,307 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,307 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,307 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")
####==== EFFECT SIZES (Cohen's conventions) ====
cat("\n=== EFFECT SIZE INTERPRETATION (Cohen's conventions) ===\n")
cat("Small effect: |r| = 0.10\n")
cat("Medium effect: |r| = 0.30\n")
cat("Large effect: |r| = 0.50\n")
# Categorize effect sizes
summary_table_with_effects <- summary_table %>%
mutate(
pearson_effect_size = case_when(
abs(pearson_r) >= 0.50 ~ "Large",
abs(pearson_r) >= 0.30 ~ "Medium",
abs(pearson_r) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
),
spearman_effect_size = case_when(
abs(spearman_rho) >= 0.50 ~ "Large",
abs(spearman_rho) >= 0.30 ~ "Medium",
abs(spearman_rho) >= 0.10 ~ "Small",
TRUE ~ "Negligible"
)
)
cat("\n=== EFFECT SIZE CATEGORIZATION ===\n")
print(summary_table_with_effects %>% select(eohi_var, cal_var, pearson_r, pearson_effect_size, spearman_rho, spearman_effect_size))

View File

@ -0,0 +1,280 @@
# Load required libraries
library(Hmisc)
library(knitr)
library(dplyr)
library(corrr)
library(broom)
library(purrr)
library(tidyr)
library(tibble)
library(boot)
options(scipen = 999)
setwd("C:/Users/irina/Documents/DND/EOHI/eohi1")
# Load data
df1 <- read.csv("exp1.csv")
# Remove columns with all NA values
df1 <- df1 %>% select(where(~ !all(is.na(.))))
# Select variables of interest
eohi_vars <- c("eohi_pref", "eohi_pers", "eohi_val", "eohi_life", "eohi_mean",
"eohiDGEN_pref", "eohiDGEN_pers", "eohiDGEN_val", "eohiDGEN_life", "eohiDGEN_mean")
cal_vars <- c("cal_selfActual", "cal_global", "cal_15", "cal_35", "cal_55", "cal_75", "cal_true", "cal_false")
# Create dataset with selected variables
df <- df1[, c(eohi_vars, cal_vars)]
# Ensure all selected variables are numeric
df <- df %>%
mutate(across(everything(), as.numeric))
# Remove rows with any missing values for correlation analysis
df_complete <- df[complete.cases(df), ]
cat("Sample size for correlation analysis:", nrow(df_complete), "\n")
cat("Total sample size:", nrow(df), "\n")
str(df)
summary(df)
####==== DESCRIPTIVE STATISTICS ====
# Function to compute descriptive statistics
get_descriptives <- function(data, vars) {
desc_stats <- data %>%
select(all_of(vars)) %>%
summarise(across(everything(), list(
n = ~sum(!is.na(.)),
mean = ~mean(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
q25 = ~quantile(., 0.25, na.rm = TRUE),
q75 = ~quantile(., 0.75, na.rm = TRUE)
))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "value") %>%
separate(variable, into = c("var", "stat"), sep = "_(?=[^_]+$)") %>%
pivot_wider(names_from = stat, values_from = value) %>%
mutate(across(c(mean, sd, min, max, median, q25, q75), ~round(., 5)))
return(desc_stats)
}
# Get descriptives for EOHI variables
eohi_descriptives <- get_descriptives(df, eohi_vars)
cat("\n=== EOHI Variables Descriptives ===\n")
print(eohi_descriptives)
# Get descriptives for calibration variables
cal_descriptives <- get_descriptives(df, cal_vars)
cat("\n=== Calibration Variables Descriptives ===\n")
print(cal_descriptives)
####==== PEARSON CORRELATIONS ====
# Compute correlation matrix with p-values
cor_results_pearson <- rcorr(as.matrix(df_complete), type = "pearson")
# Extract correlation coefficients
cor_pearson <- cor_results_pearson$r
# Extract p-values
p_matrix_pearson <- cor_results_pearson$P
# Function to add significance stars
corstars <- function(cor_mat, p_mat) {
stars <- ifelse(p_mat < 0.001, "***",
ifelse(p_mat < 0.01, "**",
ifelse(p_mat < 0.05, "*", "")))
# Combine correlation values with stars, rounded to 5 decimal places
cor_with_stars <- matrix(paste0(format(round(cor_mat, 5), nsmall = 5), stars),
nrow = nrow(cor_mat))
# Set row and column names
rownames(cor_with_stars) <- rownames(cor_mat)
colnames(cor_with_stars) <- colnames(cor_mat)
return(cor_with_stars)
}
# Apply the function
cor_table_pearson <- corstars(cor_pearson, p_matrix_pearson)
cat("\n=== PEARSON CORRELATIONS ===\n")
print(cor_table_pearson, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations <- cor_pearson[eohi_vars, cal_vars]
eohi_cal_pvalues <- p_matrix_pearson[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Pearson Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations)) {
for(j in 1:ncol(eohi_cal_correlations)) {
cor_val <- eohi_cal_correlations[i, j]
p_val <- eohi_cal_pvalues[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: r = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations)[i],
colnames(eohi_cal_correlations)[j],
cor_val, star, p_val))
}
}
####==== SPEARMAN CORRELATIONS ====
# Compute Spearman correlation matrix with p-values
cor_results_spearman <- rcorr(as.matrix(df_complete), type = "spearman")
# Extract correlation coefficients
cor_spearman <- cor_results_spearman$r
# Extract p-values
p_matrix_spearman <- cor_results_spearman$P
# Apply the function
cor_table_spearman <- corstars(cor_spearman, p_matrix_spearman)
cat("\n=== SPEARMAN CORRELATIONS ===\n")
print(cor_table_spearman, quote = FALSE)
# Extract specific correlations between EOHI and calibration variables
eohi_cal_correlations_spearman <- cor_spearman[eohi_vars, cal_vars]
eohi_cal_pvalues_spearman <- p_matrix_spearman[eohi_vars, cal_vars]
cat("\n=== EOHI x Calibration Spearman Correlations ===\n")
for(i in 1:nrow(eohi_cal_correlations_spearman)) {
for(j in 1:ncol(eohi_cal_correlations_spearman)) {
cor_val <- eohi_cal_correlations_spearman[i, j]
p_val <- eohi_cal_pvalues_spearman[i, j]
star <- ifelse(p_val < 0.001, "***",
ifelse(p_val < 0.01, "**",
ifelse(p_val < 0.05, "*", "")))
cat(sprintf("%s x %s: rho = %.5f%s, p = %.5f\n",
rownames(eohi_cal_correlations_spearman)[i],
colnames(eohi_cal_correlations_spearman)[j],
cor_val, star, p_val))
}
}
####==== BOOTSTRAPPED 95% CONFIDENCE INTERVALS ====
# Function to compute correlation with bootstrap CI
bootstrap_correlation <- function(data, var1, var2, method = "pearson", R = 1000) {
# Remove missing values
complete_data <- data[complete.cases(data[, c(var1, var2)]), ]
if(nrow(complete_data) < 3) {
return(data.frame(
correlation = NA,
ci_lower = NA,
ci_upper = NA,
n = nrow(complete_data)
))
}
# Bootstrap function
boot_fun <- function(data, indices) {
cor(data[indices, var1], data[indices, var2], method = method, use = "complete.obs")
}
# Perform bootstrap
set.seed(123) # for reproducibility
boot_results <- boot(complete_data, boot_fun, R = R)
# Calculate confidence interval
ci <- boot.ci(boot_results, type = "perc")
return(data.frame(
correlation = boot_results$t0,
ci_lower = ci$perc[4],
ci_upper = ci$perc[5],
n = nrow(complete_data)
))
}
# Compute bootstrap CIs for all EOHI x Calibration correlations
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (PEARSON) ===\n")
bootstrap_results_pearson <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "pearson", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "pearson"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_pearson)
cat("\n=== BOOTSTRAPPED 95% CONFIDENCE INTERVALS (SPEARMAN) ===\n")
bootstrap_results_spearman <- expand.grid(
eohi_var = eohi_vars,
cal_var = cal_vars,
stringsAsFactors = FALSE
) %>%
pmap_dfr(function(eohi_var, cal_var) {
result <- bootstrap_correlation(df, eohi_var, cal_var, method = "spearman", R = 1000)
result$eohi_var <- eohi_var
result$cal_var <- cal_var
result$method <- "spearman"
return(result)
}) %>%
mutate(
correlation = round(correlation, 5),
ci_lower = round(ci_lower, 5),
ci_upper = round(ci_upper, 5)
)
print(bootstrap_results_spearman)
####==== SUMMARY TABLE ====
# Create comprehensive summary table
summary_table <- bootstrap_results_pearson %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper, n) %>%
rename(pearson_r = correlation, pearson_ci_lower = ci_lower, pearson_ci_upper = ci_upper) %>%
left_join(
bootstrap_results_spearman %>%
select(eohi_var, cal_var, correlation, ci_lower, ci_upper) %>%
rename(spearman_rho = correlation, spearman_ci_lower = ci_lower, spearman_ci_upper = ci_upper),
by = c("eohi_var", "cal_var")
) %>%
# Add p-values
left_join(
expand.grid(eohi_var = eohi_vars, cal_var = cal_vars, stringsAsFactors = FALSE) %>%
pmap_dfr(function(eohi_var, cal_var) {
pearson_p <- p_matrix_pearson[eohi_var, cal_var]
spearman_p <- p_matrix_spearman[eohi_var, cal_var]
data.frame(
eohi_var = eohi_var,
cal_var = cal_var,
pearson_p = pearson_p,
spearman_p = spearman_p
)
}),
by = c("eohi_var", "cal_var")
) %>%
mutate(
pearson_p = round(pearson_p, 5),
spearman_p = round(spearman_p, 5)
)
cat("\n=== COMPREHENSIVE SUMMARY TABLE ===\n")
print(summary_table)
# Save results to CSV
write.csv(summary_table, "eohi_calibration_correlations_summary.csv", row.names = FALSE)
cat("\nResults saved to: eohi_calibration_correlations_summary.csv\n")

Some files were not shown because too many files have changed in this diff Show More